summaryrefslogtreecommitdiff
path: root/drivers/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-03-18 19:08:49 -0700
committerJakub Kicinski <kuba@kernel.org>2026-03-18 19:08:50 -0700
commit76eea68d5fe5c6474b4f2f63f785fd9f12789f5c (patch)
tree934483890bdee8e0d8f8d5bc633fa987dfdf61a1 /drivers/net
parentd5516452a362aab2c136ab815967c4417c92d228 (diff)
parent4dd2115f43594da5271a1aa34fde6719b4259047 (diff)
Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Tariq Toukan says: ==================== mlx5-next updates 2026-03-17 The following pull-request contains common mlx5 updates * 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux: net/mlx5: Expose MLX5_UMR_ALIGN definition {net/RDMA}/mlx5: Add LAG demux table API and vport demux rules net/mlx5: Add VHCA RX flow destination support for FW steering net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number net/mlx5: E-switch, modify peer miss rule index to vhca_id net/mlx5: LAG, use xa_alloc to manage LAG device indices net/mlx5: LAG, replace pf array with xarray net/mlx5: Add silent mode set/query and VHCA RX IFC bits net/mlx5: Add IFC bits for shared headroom pool PBMC support net/mlx5: Expose TLP emulation capabilities net/mlx5: Add TLP emulation device capabilities ==================== Link: https://patch.msgid.link/20260317075844.12066-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c103
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c684
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
15 files changed, 778 insertions, 182 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 6d73127b7217..2cf1d3825def 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -282,6 +282,9 @@ const char *parse_fs_dst(struct trace_seq *p,
case MLX5_FLOW_DESTINATION_TYPE_NONE:
trace_seq_printf(p, "none\n");
break;
+ case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX:
+ trace_seq_printf(p, "rx_vhca_id=%u\n", dst->vhca.id);
+ break;
}
trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1434b65d4746..397a93584fd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -35,6 +35,7 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/lag.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
#include <linux/refcount.h>
@@ -2131,7 +2132,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
mutex_unlock(&esw->offloads.peer_mutex);
list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
- if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
+ if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev))
continue;
list_del(&peer_flow->peer_flows);
@@ -2154,7 +2155,7 @@ static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow)
devcom = flow->priv->mdev->priv.eswitch->devcom;
mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
- i = mlx5_get_dev_index(peer_esw->dev);
+ i = mlx5_lag_get_dev_seq(peer_esw->dev);
mlx5e_tc_del_fdb_peer_flow(flow, i);
}
}
@@ -4584,7 +4585,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- int i = mlx5_get_dev_index(peer_esw->dev);
+ int i = mlx5_lag_get_dev_seq(peer_esw->dev);
struct mlx5e_rep_priv *peer_urpriv;
struct mlx5e_tc_flow *peer_flow;
struct mlx5_core_dev *in_mdev;
@@ -5525,7 +5526,7 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
devcom = esw->devcom;
mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
- i = mlx5_get_dev_index(peer_esw->dev);
+ i = mlx5_lag_get_dev_seq(peer_esw->dev);
list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
mlx5e_tc_del_fdb_peers_flow(flow);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c2563bee74df..5128f5020dae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -273,7 +273,7 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *send_to_vport_meta_grp;
struct mlx5_flow_group *peer_miss_grp;
- struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS];
+ struct xarray peer_miss_rules;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle **send_to_vport_meta_rules;
struct mlx5_flow_handle *miss_rule_uni;
@@ -942,6 +942,12 @@ int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
u16 vport_num);
bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev);
void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev);
+struct mlx5_flow_group *
+mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ft);
+struct mlx5_flow_handle *
+mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
+ struct mlx5_flow_table *lag_ft);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -1027,6 +1033,12 @@ mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id)
static inline void
mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev) {}
+static inline struct mlx5_flow_handle *
+mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
+ struct mlx5_flow_table *lag_ft)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
#endif /* CONFIG_MLX5_ESWITCH */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7a9ee36b8dca..bcde267df8eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1190,7 +1190,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *flow;
struct mlx5_vport *peer_vport;
struct mlx5_flow_spec *spec;
- int err, pfindex;
+ int err;
unsigned long i;
void *misc;
@@ -1270,14 +1270,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
}
}
- pfindex = mlx5_get_dev_index(peer_dev);
- if (pfindex >= MLX5_MAX_PORTS) {
- esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n",
- pfindex, MLX5_MAX_PORTS);
- err = -EINVAL;
+ err = xa_insert(&esw->fdb_table.offloads.peer_miss_rules,
+ MLX5_CAP_GEN(peer_dev, vhca_id), flows, GFP_KERNEL);
+ if (err)
goto add_ec_vf_flow_err;
- }
- esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows;
kvfree(spec);
return 0;
@@ -1319,12 +1315,13 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
- u16 peer_index = mlx5_get_dev_index(peer_dev);
+ u16 peer_vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
struct mlx5_flow_handle **flows;
struct mlx5_vport *peer_vport;
unsigned long i;
- flows = esw->fdb_table.offloads.peer_miss_rules[peer_index];
+ flows = xa_erase(&esw->fdb_table.offloads.peer_miss_rules,
+ peer_vhca_id);
if (!flows)
return;
@@ -1350,7 +1347,6 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
}
kvfree(flows);
- esw->fdb_table.offloads.peer_miss_rules[peer_index] = NULL;
}
static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
@@ -1460,6 +1456,83 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
return flow_rule;
}
+struct mlx5_flow_group *
+mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ft)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *match_criteria;
+ void *flow_group_in;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (IS_ERR(ft))
+ return ERR_CAST(ft);
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return ERR_PTR(-ENOMEM);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ft->max_fte - 1);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+
+ fg = mlx5_create_flow_group(ft, flow_group_in);
+ kvfree(flow_group_in);
+ if (IS_ERR(fg))
+ esw_warn(esw->dev, "Can't create LAG demux flow group\n");
+
+ return fg;
+}
+
+struct mlx5_flow_handle *
+mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num,
+ struct mlx5_flow_table *lag_ft)
+{
+ struct mlx5_flow_spec *spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *ret;
+ void *misc;
+
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ kvfree(spec);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VHCA_RX;
+ dest.vhca.id = MLX5_CAP_GEN(esw->dev, vhca_id);
+
+ ret = mlx5_add_flow_rules(lag_ft, spec, &flow_act, &dest, 1);
+ kvfree(spec);
+ return ret;
+}
+
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
@@ -2048,7 +2121,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
if (IS_ERR(g)) {
err = PTR_ERR(g);
- mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+ esw_warn(esw->dev, "Failed to create vport rx group err %d\n",
+ err);
goto out;
}
@@ -2093,7 +2167,8 @@ static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
if (IS_ERR(g)) {
err = PTR_ERR(g);
- mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err);
+ esw_warn(esw->dev,
+ "Failed to create vport rx drop group err %d\n", err);
goto out;
}
@@ -3247,6 +3322,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw,
return;
xa_init(&esw->paired);
+ xa_init(&esw->fdb_table.offloads.peer_miss_rules);
esw->num_peers = 0;
esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc,
MLX5_DEVCOM_ESW_OFFLOADS,
@@ -3274,6 +3350,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
mlx5_devcom_unregister_component(esw->devcom);
xa_destroy(&esw->paired);
+ xa_destroy(&esw->fdb_table.offloads.peer_miss_rules);
esw->devcom = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index c348ee62cd3a..1cd4cd898ec2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -716,6 +716,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
id = dst->dest_attr.ft->id;
ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE;
break;
+ case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX:
+ id = dst->dest_attr.vhca.id;
+ ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX;
+ break;
default:
id = dst->dest_attr.tir_num;
ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
@@ -1183,7 +1187,7 @@ int mlx5_fs_cmd_set_l2table_entry_silent(struct mlx5_core_dev *dev, u8 silent_mo
{
u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
- if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode))
+ if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode_set))
return -EOPNOTSUPP;
MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2c3544880a30..61a6ba1e49dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -503,7 +503,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
type == MLX5_FLOW_DESTINATION_TYPE_TIR ||
type == MLX5_FLOW_DESTINATION_TYPE_RANGE ||
- type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+ type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE ||
+ type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX;
}
static bool check_valid_spec(const struct mlx5_flow_spec *spec)
@@ -1437,15 +1438,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table*
mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
- int prio, u32 level)
+ struct mlx5_flow_table_attr *ft_attr)
{
- struct mlx5_flow_table_attr ft_attr = {};
-
- ft_attr.level = level;
- ft_attr.prio = prio;
- ft_attr.max_fte = 1;
-
- return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
+ return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
}
EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
@@ -1890,7 +1885,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
d1->range.hit_ft == d2->range.hit_ft &&
d1->range.miss_ft == d2->range.miss_ft &&
d1->range.min == d2->range.min &&
- d1->range.max == d2->range.max))
+ d1->range.max == d2->range.max) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX &&
+ d1->vhca.id == d2->vhca.id))
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index eeb4437975f2..55249f405841 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -255,6 +255,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, tlp_device_emulation_manager)) {
+ err = mlx5_core_get_caps_mode(dev, MLX5_CAP_TLP_EMULATION, HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ }
+
if (MLX5_CAP_GEN(dev, ipsec_offload)) {
err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPSEC, HCA_CAP_OPMOD_GET_CUR);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
index 62b6faa4276a..37de4be0e620 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -145,7 +145,8 @@ static int members_show(struct seq_file *file, void *priv)
ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
mlx5_ldev_for_each(i, 0, ldev)
- seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
+ seq_printf(file, "%s\n",
+ dev_name(mlx5_lag_pf(ldev, i)->dev->device));
mutex_unlock(&ldev->lock);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 044adfdf9aa2..449e4bd86c06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/lag.h>
#include "lib/mlx5.h"
#include "lib/devcom.h"
#include "mlx5_core.h"
@@ -232,6 +233,7 @@ static void mlx5_do_bond_work(struct work_struct *work);
static void mlx5_ldev_free(struct kref *ref)
{
struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
+ struct lag_func *pf;
struct net *net;
int i;
@@ -241,13 +243,16 @@ static void mlx5_ldev_free(struct kref *ref)
}
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].dev &&
- ldev->pf[i].port_change_nb.nb.notifier_call) {
- struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->port_change_nb.nb.notifier_call) {
+ struct mlx5_nb *nb = &pf->port_change_nb;
- mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
+ mlx5_eq_notifier_unregister(pf->dev, nb);
}
+ xa_erase(&ldev->pfs, i);
+ kfree(pf);
}
+ xa_destroy(&ldev->pfs);
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
@@ -284,6 +289,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
kref_init(&ldev->ref);
mutex_init(&ldev->lock);
+ xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
@@ -309,23 +315,54 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev)
{
+ struct lag_func *pf;
int i;
- mlx5_ldev_for_each(i, 0, ldev)
- if (ldev->pf[i].netdev == ndev)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->netdev == ndev)
return i;
+ }
return -ENOENT;
}
+static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
+{
+ unsigned long idx = 0;
+ void *entry;
+
+ if (!ldev)
+ return -ENOENT;
+
+ entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
+ if (!entry)
+ return -ENOENT;
+
+ return (int)idx;
+}
+
int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
{
- int i, num = 0;
+ int master_idx, i, num = 0;
if (!ldev)
return -ENOENT;
+ master_idx = mlx5_lag_get_master_idx(ldev);
+
+ /* If seq 0 is requested and there's a primary PF, return it */
+ if (master_idx >= 0) {
+ if (seq == 0)
+ return master_idx;
+ num++;
+ }
+
mlx5_ldev_for_each(i, 0, ldev) {
+ /* Skip the primary PF in the loop */
+ if (i == master_idx)
+ continue;
+
if (num == seq)
return i;
num++;
@@ -333,6 +370,108 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
return -ENOENT;
}
+/* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
+ * sequence number in the LAG. Master is always 0, others numbered
+ * sequentially starting from 1.
+ */
+int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev = mlx5_lag_dev(dev);
+ int master_idx, i, num = 1;
+ struct lag_func *pf;
+
+ if (!ldev)
+ return -ENOENT;
+
+ master_idx = mlx5_lag_get_master_idx(ldev);
+ if (master_idx < 0)
+ return -ENOENT;
+
+ pf = mlx5_lag_pf(ldev, master_idx);
+ if (pf && pf->dev == dev)
+ return 0;
+
+ mlx5_ldev_for_each(i, 0, ldev) {
+ if (i == master_idx)
+ continue;
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->dev == dev)
+ return num;
+ num++;
+ }
+ return -ENOENT;
+}
+EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
+
+/* Devcom events for LAG master marking */
+#define LAG_DEVCOM_PAIR (0)
+#define LAG_DEVCOM_UNPAIR (1)
+
+static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
+{
+ int lowest_dev_idx = INT_MAX;
+ struct lag_func *pf;
+ int master_xa_idx = -1;
+ int dev_idx;
+ int i;
+
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ dev_idx = mlx5_get_dev_index(pf->dev);
+ if (dev_idx < lowest_dev_idx) {
+ lowest_dev_idx = dev_idx;
+ master_xa_idx = i;
+ }
+ }
+
+ if (master_xa_idx >= 0)
+ xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
+}
+
+static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
+{
+ unsigned long idx = 0;
+ void *entry;
+
+ entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
+ if (!entry)
+ return;
+
+ xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
+}
+
+/* Devcom event handler to manage LAG master marking */
+static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
+{
+ struct mlx5_core_dev *dev = my_data;
+ struct mlx5_lag *ldev;
+ int idx;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return 0;
+
+ mutex_lock(&ldev->lock);
+ switch (event) {
+ case LAG_DEVCOM_PAIR:
+ /* No need to mark more than once */
+ idx = mlx5_lag_get_master_idx(ldev);
+ if (idx >= 0)
+ break;
+ /* Check if all LAG ports are now registered */
+ if (mlx5_lag_num_devs(ldev) == ldev->ports)
+ mlx5_lag_mark_master(ldev);
+ break;
+
+ case LAG_DEVCOM_UNPAIR:
+ /* Clear master mark when a device is removed */
+ mlx5_lag_clear_master(ldev);
+ break;
+ }
+ mutex_unlock(&ldev->lock);
+ return 0;
+}
+
int mlx5_lag_num_devs(struct mlx5_lag *ldev)
{
int i, num = 0;
@@ -349,14 +488,17 @@ int mlx5_lag_num_devs(struct mlx5_lag *ldev)
int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
{
+ struct lag_func *pf;
int i, num = 0;
if (!ldev)
return 0;
- mlx5_ldev_for_each(i, 0, ldev)
- if (ldev->pf[i].netdev)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->netdev)
num++;
+ }
return num;
}
@@ -400,11 +542,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
/* Use native mapping by default where each port's buckets
* point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+ * ports[] values are 1-indexed device indices for FW.
*/
mlx5_ldev_for_each(i, 0, ldev) {
for (j = 0; j < buckets; j++) {
idx = i * buckets + j;
- ports[idx] = i + 1;
+ ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
}
}
@@ -416,33 +559,42 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
/* Go over the disabled ports and for each assign a random active port */
for (i = 0; i < disabled_ports_num; i++) {
for (j = 0; j < buckets; j++) {
+ int rand_xa_idx;
+
get_random_bytes(&rand, 4);
- ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+ rand_xa_idx = enabled[rand % enabled_ports_num];
+ ports[disabled[i] * buckets + j] =
+ mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
}
}
}
static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
{
+ struct lag_func *pf;
int i;
- mlx5_ldev_for_each(i, 0, ldev)
- if (ldev->pf[i].has_drop)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->has_drop)
return true;
+ }
return false;
}
static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
{
+ struct lag_func *pf;
int i;
mlx5_ldev_for_each(i, 0, ldev) {
- if (!ldev->pf[i].has_drop)
+ pf = mlx5_lag_pf(ldev, i);
+ if (!pf->has_drop)
continue;
- mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
+ mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch,
MLX5_VPORT_UPLINK);
- ldev->pf[i].has_drop = false;
+ pf->has_drop = false;
}
}
@@ -451,6 +603,7 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
{
u8 disabled_ports[MLX5_MAX_PORTS] = {};
struct mlx5_core_dev *dev;
+ struct lag_func *pf;
int disabled_index;
int num_disabled;
int err;
@@ -468,11 +621,12 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
for (i = 0; i < num_disabled; i++) {
disabled_index = disabled_ports[i];
- dev = ldev->pf[disabled_index].dev;
+ pf = mlx5_lag_pf(ldev, disabled_index);
+ dev = pf->dev;
err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
MLX5_VPORT_UPLINK);
if (!err)
- ldev->pf[disabled_index].has_drop = true;
+ pf->has_drop = true;
else
mlx5_core_err(dev,
"Failed to create lag drop rule, error: %d", err);
@@ -504,7 +658,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
if (idx < 0)
return -EINVAL;
- dev0 = ldev->pf[idx].dev;
+ dev0 = mlx5_lag_pf(ldev, idx)->dev;
if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
ret = mlx5_lag_port_sel_modify(ldev, ports);
if (ret ||
@@ -521,6 +675,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
{
struct net_device *ndev = NULL;
+ struct lag_func *pf;
struct mlx5_lag *ldev;
unsigned long flags;
int i, last_idx;
@@ -531,14 +686,17 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev
if (!ldev)
goto unlock;
- mlx5_ldev_for_each(i, 0, ldev)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
if (ldev->tracker.netdev_state[i].tx_enabled)
- ndev = ldev->pf[i].netdev;
+ ndev = pf->netdev;
+ }
if (!ndev) {
last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
if (last_idx < 0)
goto unlock;
- ndev = ldev->pf[last_idx].netdev;
+ pf = mlx5_lag_pf(ldev, last_idx);
+ ndev = pf->netdev;
}
dev_hold(ndev);
@@ -563,7 +721,7 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
if (first_idx < 0)
return;
- dev0 = ldev->pf[first_idx].dev;
+ dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
mlx5_ldev_for_each(i, 0, ldev) {
@@ -615,7 +773,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
mode == MLX5_LAG_MODE_MULTIPATH)
return 0;
- dev0 = ldev->pf[first_idx].dev;
+ dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
if (ldev->ports > 2)
@@ -661,19 +819,24 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
{
- int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+ int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
struct mlx5_eswitch *master_esw;
struct mlx5_core_dev *dev0;
int i, j;
int err;
- if (first_idx < 0)
+ if (master_idx < 0)
return -EINVAL;
- dev0 = ldev->pf[first_idx].dev;
+ dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
master_esw = dev0->priv.eswitch;
- mlx5_ldev_for_each(i, first_idx + 1, ldev) {
- struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
+ mlx5_ldev_for_each(i, 0, ldev) {
+ struct mlx5_eswitch *slave_esw;
+
+ if (i == master_idx)
+ continue;
+
+ slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
slave_esw, ldev->ports);
@@ -682,9 +845,12 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
}
return 0;
err:
- mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
+ mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
+ if (j == master_idx)
+ continue;
mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
- ldev->pf[j].dev->priv.eswitch);
+ mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
+ }
return err;
}
@@ -693,8 +859,8 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
enum mlx5_lag_mode mode,
unsigned long flags)
{
- bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+ bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
struct mlx5_core_dev *dev0;
int err;
@@ -702,7 +868,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
if (first_idx < 0)
return -EINVAL;
- dev0 = ldev->pf[first_idx].dev;
+ dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
if (tracker)
mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
@@ -740,16 +906,17 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
enum mlx5_lag_mode mode,
bool shared_fdb)
{
- int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
struct mlx5_core_dev *dev0;
unsigned long flags = 0;
+ int master_idx;
int err;
- if (first_idx < 0)
+ master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+ if (master_idx < 0)
return -EINVAL;
- dev0 = ldev->pf[first_idx].dev;
+ dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
if (err)
return err;
@@ -793,7 +960,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
int mlx5_deactivate_lag(struct mlx5_lag *ldev)
{
- int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+ int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
bool roce_lag = __mlx5_lag_is_roce(ldev);
unsigned long flags = ldev->mode_flags;
@@ -802,19 +969,22 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
int err;
int i;
- if (first_idx < 0)
+ if (master_idx < 0)
return -EINVAL;
- dev0 = ldev->pf[first_idx].dev;
+ dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
master_esw = dev0->priv.eswitch;
ldev->mode = MLX5_LAG_MODE_NONE;
ldev->mode_flags = 0;
mlx5_lag_mp_reset(ldev);
if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
- mlx5_ldev_for_each(i, first_idx + 1, ldev)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ if (i == master_idx)
+ continue;
mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
- ldev->pf[i].dev->priv.eswitch);
+ mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
+ }
clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
}
@@ -844,68 +1014,82 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
{
- int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+ int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
#ifdef CONFIG_MLX5_ESWITCH
struct mlx5_core_dev *dev;
u8 mode;
#endif
+ struct lag_func *pf;
bool roce_support;
int i;
- if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
+ if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
return false;
#ifdef CONFIG_MLX5_ESWITCH
mlx5_ldev_for_each(i, 0, ldev) {
- dev = ldev->pf[i].dev;
+ pf = mlx5_lag_pf(ldev, i);
+ dev = pf->dev;
if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
return false;
}
- dev = ldev->pf[first_idx].dev;
+ pf = mlx5_lag_pf(ldev, master_idx);
+ dev = pf->dev;
mode = mlx5_eswitch_mode(dev);
- mlx5_ldev_for_each(i, 0, ldev)
- if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (mlx5_eswitch_mode(pf->dev) != mode)
return false;
+ }
#else
- mlx5_ldev_for_each(i, 0, ldev)
- if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (mlx5_sriov_is_enabled(pf->dev))
return false;
+ }
#endif
- roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
- mlx5_ldev_for_each(i, first_idx + 1, ldev)
- if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
+ pf = mlx5_lag_pf(ldev, master_idx);
+ roce_support = mlx5_get_roce_state(pf->dev);
+ mlx5_ldev_for_each(i, 0, ldev) {
+ if (i == master_idx)
+ continue;
+ pf = mlx5_lag_pf(ldev, i);
+ if (mlx5_get_roce_state(pf->dev) != roce_support)
return false;
+ }
return true;
}
void mlx5_lag_add_devices(struct mlx5_lag *ldev)
{
+ struct lag_func *pf;
int i;
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].dev->priv.flags &
- MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
continue;
- ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
- mlx5_rescan_drivers_locked(ldev->pf[i].dev);
+ pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(pf->dev);
}
}
void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
{
+ struct lag_func *pf;
int i;
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].dev->priv.flags &
- MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
continue;
- ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
- mlx5_rescan_drivers_locked(ldev->pf[i].dev);
+ pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(pf->dev);
}
}
@@ -921,7 +1105,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
if (idx < 0)
return;
- dev0 = ldev->pf[idx].dev;
+ dev0 = mlx5_lag_pf(ldev, idx)->dev;
roce_lag = __mlx5_lag_is_roce(ldev);
if (shared_fdb) {
@@ -931,8 +1115,11 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
}
- mlx5_ldev_for_each(i, idx + 1, ldev)
- mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
+ mlx5_ldev_for_each(i, 0, ldev) {
+ if (i == idx)
+ continue;
+ mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
+ }
}
err = mlx5_deactivate_lag(ldev);
@@ -944,21 +1131,25 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
if (shared_fdb)
mlx5_ldev_for_each(i, 0, ldev)
- if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
- mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
}
bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
{
- int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
struct mlx5_core_dev *dev;
+ bool ret = false;
+ int idx;
int i;
+ idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
if (idx < 0)
return false;
- mlx5_ldev_for_each(i, idx + 1, ldev) {
- dev = ldev->pf[i].dev;
+ mlx5_ldev_for_each(i, 0, ldev) {
+ if (i == idx)
+ continue;
+ dev = mlx5_lag_pf(ldev, i)->dev;
if (is_mdev_switchdev_mode(dev) &&
mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
@@ -969,28 +1160,33 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
return false;
}
- dev = ldev->pf[idx].dev;
+ dev = mlx5_lag_pf(ldev, idx)->dev;
if (is_mdev_switchdev_mode(dev) &&
mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
- return true;
+ ret = true;
- return false;
+ return ret;
}
static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
{
bool roce_lag = true;
+ struct lag_func *pf;
int i;
- mlx5_ldev_for_each(i, 0, ldev)
- roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev);
+ }
#ifdef CONFIG_MLX5_ESWITCH
- mlx5_ldev_for_each(i, 0, ldev)
- roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev);
+ }
#endif
return roce_lag;
@@ -1014,13 +1210,17 @@ mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
int (*get_speed)(struct mlx5_core_dev *, u32 *))
{
struct mlx5_core_dev *pf_mdev;
+ struct lag_func *pf;
int pf_idx;
u32 speed;
int ret;
*sum_speed = 0;
mlx5_ldev_for_each(pf_idx, 0, ldev) {
- pf_mdev = ldev->pf[pf_idx].dev;
+ pf = mlx5_lag_pf(ldev, pf_idx);
+ if (!pf)
+ continue;
+ pf_mdev = pf->dev;
if (!pf_mdev)
continue;
@@ -1086,6 +1286,7 @@ static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *mdev;
+ struct lag_func *pf;
u32 speed;
int pf_idx;
@@ -1105,7 +1306,10 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
speed = speed / MLX5_MAX_TX_SPEED_UNIT;
mlx5_ldev_for_each(pf_idx, 0, ldev) {
- mdev = ldev->pf[pf_idx].dev;
+ pf = mlx5_lag_pf(ldev, pf_idx);
+ if (!pf)
+ continue;
+ mdev = pf->dev;
if (!mdev)
continue;
@@ -1116,12 +1320,16 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *mdev;
+ struct lag_func *pf;
u32 speed;
int pf_idx;
int ret;
mlx5_ldev_for_each(pf_idx, 0, ldev) {
- mdev = ldev->pf[pf_idx].dev;
+ pf = mlx5_lag_pf(ldev, pf_idx);
+ if (!pf)
+ continue;
+ mdev = pf->dev;
if (!mdev)
continue;
@@ -1152,7 +1360,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
if (idx < 0)
return;
- dev0 = ldev->pf[idx].dev;
+ dev0 = mlx5_lag_pf(ldev, idx)->dev;
if (!mlx5_lag_is_ready(ldev)) {
do_bond = false;
} else {
@@ -1182,16 +1390,23 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
mlx5_lag_add_devices(ldev);
if (shared_fdb) {
mlx5_ldev_for_each(i, 0, ldev)
- mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
}
return;
- } else if (roce_lag) {
+ }
+
+ if (roce_lag) {
+ struct mlx5_core_dev *dev;
+
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
- mlx5_ldev_for_each(i, idx + 1, ldev) {
- if (mlx5_get_roce_state(ldev->pf[i].dev))
- mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+ mlx5_ldev_for_each(i, 0, ldev) {
+ if (i == idx)
+ continue;
+ dev = mlx5_lag_pf(ldev, i)->dev;
+ if (mlx5_get_roce_state(dev))
+ mlx5_nic_vport_enable_roce(dev);
}
} else if (shared_fdb) {
int i;
@@ -1200,7 +1415,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
mlx5_rescan_drivers_locked(dev0);
mlx5_ldev_for_each(i, 0, ldev) {
- err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
if (err)
break;
}
@@ -1211,7 +1426,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
mlx5_deactivate_lag(ldev);
mlx5_lag_add_devices(ldev);
mlx5_ldev_for_each(i, 0, ldev)
- mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
mlx5_core_err(dev0, "Failed to enable lag\n");
return;
}
@@ -1243,16 +1458,171 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
{
struct mlx5_devcom_comp_dev *devcom = NULL;
+ struct lag_func *pf;
int i;
mutex_lock(&ldev->lock);
i = mlx5_get_next_ldev_func(ldev, 0);
- if (i < MLX5_MAX_PORTS)
- devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
+ if (i < MLX5_MAX_PORTS) {
+ pf = mlx5_lag_pf(ldev, i);
+ devcom = pf->dev->priv.hca_devcom_comp;
+ }
mutex_unlock(&ldev->lock);
return devcom;
}
+static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_lag *ldev)
+{
+#ifdef CONFIG_MLX5_ESWITCH
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_group *fg;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
+ if (!ns)
+ return 0;
+
+ ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
+ if (IS_ERR(ldev->lag_demux_ft))
+ return PTR_ERR(ldev->lag_demux_ft);
+
+ fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
+ ldev->lag_demux_ft);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ mlx5_destroy_flow_table(ldev->lag_demux_ft);
+ ldev->lag_demux_ft = NULL;
+ return err;
+ }
+
+ ldev->lag_demux_fg = fg;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_lag *ldev)
+{
+ struct mlx5_flow_namespace *ns;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
+ if (!ns)
+ return 0;
+
+ ldev->lag_demux_fg = NULL;
+ ft_attr->max_fte = 1;
+ ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
+ if (IS_ERR(ldev->lag_demux_ft)) {
+ err = PTR_ERR(ldev->lag_demux_ft);
+ ldev->lag_demux_ft = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ struct mlx5_lag *ldev;
+
+ if (!ft_attr)
+ return -EINVAL;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return -ENODEV;
+
+ xa_init(&ldev->lag_demux_rules);
+
+ if (mlx5_get_sd(dev))
+ return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
+
+ return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
+}
+EXPORT_SYMBOL(mlx5_lag_demux_init);
+
+void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_handle *rule;
+ struct mlx5_lag *ldev;
+ unsigned long vport_num;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ return;
+
+ xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
+ mlx5_del_flow_rules(rule);
+ xa_destroy(&ldev->lag_demux_rules);
+
+ if (ldev->lag_demux_fg)
+ mlx5_destroy_flow_group(ldev->lag_demux_fg);
+ if (ldev->lag_demux_ft)
+ mlx5_destroy_flow_table(ldev->lag_demux_ft);
+ ldev->lag_demux_fg = NULL;
+ ldev->lag_demux_ft = NULL;
+}
+EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
+
+int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
+ int index)
+{
+ struct mlx5_flow_handle *rule;
+ struct mlx5_lag *ldev;
+ int err;
+
+ ldev = mlx5_lag_dev(vport_dev);
+ if (!ldev || !ldev->lag_demux_fg)
+ return 0;
+
+ if (xa_load(&ldev->lag_demux_rules, index))
+ return 0;
+
+ rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
+ vport_num, ldev->lag_demux_ft);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(vport_dev,
+ "Failed to create LAG demux rule for vport %u, err %d\n",
+ vport_num, err);
+ return err;
+ }
+
+ err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
+ GFP_KERNEL));
+ if (err) {
+ mlx5_del_flow_rules(rule);
+ mlx5_core_warn(vport_dev,
+ "Failed to store LAG demux rule for vport %u, err %d\n",
+ vport_num, err);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
+
+void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
+{
+ struct mlx5_flow_handle *rule;
+ struct mlx5_lag *ldev;
+
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev || !ldev->lag_demux_fg)
+ return;
+
+ rule = xa_erase(&ldev->lag_demux_rules, index);
+ if (rule)
+ mlx5_del_flow_rules(rule);
+}
+EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
+
static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
{
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
@@ -1297,6 +1667,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
struct netdev_lag_upper_info *lag_upper_info = NULL;
bool is_bonded, is_in_lag, mode_supported;
bool has_inactive = 0;
+ struct lag_func *pf;
struct slave *slave;
u8 bond_status = 0;
int num_slaves = 0;
@@ -1317,7 +1688,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].netdev == ndev_tmp) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->netdev == ndev_tmp) {
idx++;
break;
}
@@ -1537,59 +1909,92 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev,
struct net_device *netdev)
{
- unsigned int fn = mlx5_get_dev_index(dev);
+ struct lag_func *pf;
unsigned long flags;
+ int i;
spin_lock_irqsave(&lag_lock, flags);
- ldev->pf[fn].netdev = netdev;
- ldev->tracker.netdev_state[fn].link_up = 0;
- ldev->tracker.netdev_state[fn].tx_enabled = 0;
+ /* Find pf entry by matching dev pointer */
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->dev == dev) {
+ pf->netdev = netdev;
+ ldev->tracker.netdev_state[i].link_up = 0;
+ ldev->tracker.netdev_state[i].tx_enabled = 0;
+ break;
+ }
+ }
spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
+ struct lag_func *pf;
unsigned long flags;
int i;
spin_lock_irqsave(&lag_lock, flags);
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].netdev == netdev) {
- ldev->pf[i].netdev = NULL;
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->netdev == netdev) {
+ pf->netdev = NULL;
break;
}
}
spin_unlock_irqrestore(&lag_lock, flags);
}
-static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
+static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
{
- unsigned int fn = mlx5_get_dev_index(dev);
+ struct lag_func *pf;
+ u32 idx;
+ int err;
+
+ pf = kzalloc_obj(*pf);
+ if (!pf)
+ return -ENOMEM;
+
+ err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
+ GFP_KERNEL);
+ if (err) {
+ kfree(pf);
+ return err;
+ }
- ldev->pf[fn].dev = dev;
+ pf->idx = idx;
+ pf->dev = dev;
dev->priv.lag = ldev;
- MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
+ MLX5_NB_INIT(&pf->port_change_nb,
mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
- mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
+ mlx5_eq_notifier_register(dev, &pf->port_change_nb);
+
+ return 0;
}
static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
{
- int fn;
+ struct lag_func *pf;
+ int i;
- fn = mlx5_get_dev_index(dev);
- if (ldev->pf[fn].dev != dev)
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->dev == dev)
+ break;
+ }
+ if (i >= MLX5_MAX_PORTS)
return;
- if (ldev->pf[fn].port_change_nb.nb.notifier_call)
- mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);
+ if (pf->port_change_nb.nb.notifier_call)
+ mlx5_eq_notifier_unregister(dev, &pf->port_change_nb);
- ldev->pf[fn].dev = NULL;
+ pf->dev = NULL;
dev->priv.lag = NULL;
+ xa_erase(&ldev->pfs, pf->idx);
+ kfree(pf);
}
/* Must be called with HCA devcom component lock held */
@@ -1598,6 +2003,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
struct mlx5_devcom_comp_dev *pos = NULL;
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
+ int err;
tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
if (tmp_dev)
@@ -1609,7 +2015,12 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "Failed to alloc lag dev\n");
return 0;
}
- mlx5_ldev_add_mdev(ldev, dev);
+ err = mlx5_ldev_add_mdev(ldev, dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to add mdev to lag dev\n");
+ mlx5_ldev_put(ldev);
+ return 0;
+ }
return 0;
}
@@ -1619,7 +2030,12 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
return -EAGAIN;
}
mlx5_ldev_get(ldev);
- mlx5_ldev_add_mdev(ldev, dev);
+ err = mlx5_ldev_add_mdev(ldev, dev);
+ if (err) {
+ mlx5_ldev_put(ldev);
+ mutex_unlock(&ldev->lock);
+ return err;
+ }
mutex_unlock(&ldev->lock);
return 0;
@@ -1647,7 +2063,8 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
dev->priv.hca_devcom_comp =
mlx5_devcom_register_component(dev->priv.devc,
MLX5_DEVCOM_HCA_PORTS,
- &attr, NULL, dev);
+ &attr, mlx5_lag_devcom_event,
+ dev);
if (!dev->priv.hca_devcom_comp) {
mlx5_core_err(dev,
"Failed to register devcom HCA component.");
@@ -1678,6 +2095,9 @@ recheck:
}
mlx5_ldev_remove_mdev(ldev, dev);
mutex_unlock(&ldev->lock);
+ /* Send devcom event to notify peers that a device is being removed */
+ mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
+ LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
mlx5_lag_unregister_hca_devcom_comp(dev);
mlx5_ldev_put(ldev);
}
@@ -1701,6 +2121,9 @@ recheck:
msleep(100);
goto recheck;
}
+ /* Send devcom event to notify peers that a device was added */
+ mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
+ LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
mlx5_ldev_add_debugfs(dev);
}
@@ -1746,21 +2169,25 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
{
+ struct lag_func *pf;
int i;
- for (i = start_idx; i >= end_idx; i--)
- if (ldev->pf[i].dev)
+ for (i = start_idx; i >= end_idx; i--) {
+ pf = xa_load(&ldev->pfs, i);
+ if (pf && pf->dev)
return i;
+ }
return -1;
}
int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
{
- int i;
+ struct lag_func *pf;
+ unsigned long idx;
- for (i = start_idx; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- return i;
+ xa_for_each_start(&ldev->pfs, idx, pf, start_idx)
+ if (pf->dev)
+ return idx;
return MLX5_MAX_PORTS;
}
@@ -1814,13 +2241,17 @@ bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
unsigned long flags;
+ struct lag_func *pf;
bool res = false;
int idx;
spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
- res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
+ if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) {
+ pf = mlx5_lag_pf(ldev, idx);
+ res = pf && dev == pf->dev;
+ }
spin_unlock_irqrestore(&lag_lock, flags);
return res;
@@ -1899,6 +2330,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
{
struct mlx5_lag *ldev;
unsigned long flags;
+ struct lag_func *pf;
u8 port = 0;
int i;
@@ -1908,7 +2340,8 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
goto unlock;
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].netdev == slave) {
+ pf = mlx5_lag_pf(ldev, i);
+ if (pf->netdev == slave) {
port = i;
break;
}
@@ -1939,6 +2372,7 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int
struct mlx5_core_dev *peer_dev = NULL;
struct mlx5_lag *ldev;
unsigned long flags;
+ struct lag_func *pf;
int idx;
spin_lock_irqsave(&lag_lock, flags);
@@ -1948,9 +2382,11 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int
if (*i == MLX5_MAX_PORTS)
goto unlock;
- mlx5_ldev_for_each(idx, *i, ldev)
- if (ldev->pf[idx].dev != dev)
+ mlx5_ldev_for_each(idx, *i, ldev) {
+ pf = mlx5_lag_pf(ldev, idx);
+ if (pf->dev != dev)
break;
+ }
if (idx == MLX5_MAX_PORTS) {
*i = idx;
@@ -1958,7 +2394,8 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int
}
*i = idx + 1;
- peer_dev = ldev->pf[idx].dev;
+ pf = mlx5_lag_pf(ldev, idx);
+ peer_dev = pf->dev;
unlock:
spin_unlock_irqrestore(&lag_lock, flags);
@@ -1976,6 +2413,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
int ret = 0, i, j, idx = 0;
struct mlx5_lag *ldev;
unsigned long flags;
+ struct lag_func *pf;
int num_ports;
void *out;
@@ -1995,8 +2433,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
num_ports = ldev->ports;
- mlx5_ldev_for_each(i, 0, ldev)
- mdev[idx++] = ldev->pf[i].dev;
+ mlx5_ldev_for_each(i, 0, ldev) {
+ pf = mlx5_lag_pf(ldev, i);
+ mdev[idx++] = pf->dev;
+ }
} else {
num_ports = 1;
mdev[MLX5_LAG_P1] = dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index be1afece5fdc..6c911374f409 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -5,8 +5,17 @@
#define __MLX5_LAG_H__
#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/xarray.h>
+#include <linux/mlx5/fs.h>
#define MLX5_LAG_MAX_HASH_BUCKETS 16
+/* XArray mark for the LAG master device
+ * (device with lowest mlx5_get_dev_index).
+ * Note: XA_MARK_0 is reserved by XA_FLAGS_ALLOC for free-slot tracking.
+ */
+#define MLX5_LAG_XA_MARK_MASTER XA_MARK_1
+
#include "mlx5_core.h"
#include "mp.h"
#include "port_sel.h"
@@ -39,6 +48,7 @@ struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
bool has_drop;
+ unsigned int idx; /* xarray index assigned by LAG */
struct mlx5_nb port_change_nb;
};
@@ -64,7 +74,7 @@ struct mlx5_lag {
int mode_changes_in_progress;
u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
struct kref ref;
- struct lag_func pf[MLX5_MAX_PORTS];
+ struct xarray pfs;
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
@@ -76,6 +86,9 @@ struct mlx5_lag {
/* Protect lag fields/state changes */
struct mutex lock;
struct lag_mpesw lag_mpesw;
+ struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_flow_group *lag_demux_fg;
+ struct xarray lag_demux_rules;
};
static inline struct mlx5_lag *
@@ -84,6 +97,34 @@ mlx5_lag_dev(struct mlx5_core_dev *dev)
return dev->priv.lag;
}
+static inline struct lag_func *
+mlx5_lag_pf(struct mlx5_lag *ldev, unsigned int idx)
+{
+ return xa_load(&ldev->pfs, idx);
+}
+
+/* Get device index (mlx5_get_dev_index) from xarray index */
+static inline int mlx5_lag_xa_to_dev_idx(struct mlx5_lag *ldev, int xa_idx)
+{
+ struct lag_func *pf = mlx5_lag_pf(ldev, xa_idx);
+
+ return pf ? mlx5_get_dev_index(pf->dev) : -ENOENT;
+}
+
+/* Find lag_func by device index (reverse lookup from mlx5_get_dev_index) */
+static inline struct lag_func *
+mlx5_lag_pf_by_dev_idx(struct mlx5_lag *ldev, int dev_idx)
+{
+ struct lag_func *pf;
+ unsigned long idx;
+
+ xa_for_each(&ldev->pfs, idx, pf) {
+ if (mlx5_get_dev_index(pf->dev) == dev_idx)
+ return pf;
+ }
+ return NULL;
+}
+
static inline bool
__mlx5_lag_is_active(struct mlx5_lag *ldev)
{
@@ -98,6 +139,12 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev)
bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev);
bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
+int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table_attr *ft_attr);
+void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev);
+int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num,
+ int vport_index);
+void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index);
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index c4c2bf33ef35..f42e051fa7e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -29,8 +29,8 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS)
return false;
- return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev,
- ldev->pf[idx1].dev);
+ return mlx5_esw_multipath_prereq(mlx5_lag_pf(ldev, idx0)->dev,
+ mlx5_lag_pf(ldev, idx1)->dev);
}
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
@@ -80,18 +80,18 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
tracker.netdev_state[idx1].link_up = true;
break;
default:
- mlx5_core_warn(ldev->pf[idx0].dev,
+ mlx5_core_warn(mlx5_lag_pf(ldev, idx0)->dev,
"Invalid affinity port %d", port);
return;
}
if (tracker.netdev_state[idx0].tx_enabled)
- mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events,
+ mlx5_notifier_call_chain(mlx5_lag_pf(ldev, idx0)->dev->priv.events,
MLX5_DEV_EVENT_PORT_AFFINITY,
(void *)0);
if (tracker.netdev_state[idx1].tx_enabled)
- mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events,
+ mlx5_notifier_call_chain(mlx5_lag_pf(ldev, idx1)->dev->priv.events,
MLX5_DEV_EVENT_PORT_AFFINITY,
(void *)0);
@@ -146,7 +146,7 @@ mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
if (ldev_idx >= 0)
- return ldev->pf[ldev_idx].netdev;
+ return mlx5_lag_pf(ldev, ldev_idx)->netdev;
}
return NULL;
@@ -178,7 +178,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
mp->fib.dst_len <= fen_info->dst_len &&
!(mp->fib.dst_len == fen_info->dst_len &&
fi->fib_priority < mp->fib.priority)) {
- mlx5_core_dbg(ldev->pf[idx].dev,
+ mlx5_core_dbg(mlx5_lag_pf(ldev, idx)->dev,
"Multipath entry with lower priority was rejected\n");
return;
}
@@ -194,7 +194,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
}
if (nh_dev0 == nh_dev1) {
- mlx5_core_warn(ldev->pf[idx].dev,
+ mlx5_core_warn(mlx5_lag_pf(ldev, idx)->dev,
"Multipath offload doesn't support routes with multiple nexthops of the same device");
return;
}
@@ -203,7 +203,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
if (__mlx5_lag_is_active(ldev)) {
mlx5_ldev_for_each(i, 0, ldev) {
dev_idx++;
- if (ldev->pf[i].netdev == nh_dev0)
+ if (mlx5_lag_pf(ldev, i)->netdev == nh_dev0)
break;
}
mlx5_lag_set_port_affinity(ldev, dev_idx);
@@ -240,7 +240,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
/* nh added/removed */
if (event == FIB_EVENT_NH_DEL) {
mlx5_ldev_for_each(i, 0, ldev) {
- if (ldev->pf[i].netdev == fib_nh->fib_nh_dev)
+ if (mlx5_lag_pf(ldev, i)->netdev == fib_nh->fib_nh_dev)
break;
dev_idx++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 74d5c2ed14ff..5eea12a6887a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -16,7 +16,7 @@ static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev)
int i;
mlx5_ldev_for_each(i, 0, ldev) {
- dev = ldev->pf[i].dev;
+ dev = mlx5_lag_pf(ldev, i)->dev;
esw = dev->priv.eswitch;
pf_metadata = ldev->lag_mpesw.pf_metadata[i];
if (!pf_metadata)
@@ -37,7 +37,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
int i, err;
mlx5_ldev_for_each(i, 0, ldev) {
- dev = ldev->pf[i].dev;
+ dev = mlx5_lag_pf(ldev, i)->dev;
esw = dev->priv.eswitch;
pf_metadata = mlx5_esw_match_metadata_alloc(esw);
if (!pf_metadata) {
@@ -53,7 +53,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
}
mlx5_ldev_for_each(i, 0, ldev) {
- dev = ldev->pf[i].dev;
+ dev = mlx5_lag_pf(ldev, i)->dev;
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW,
(void *)0);
}
@@ -67,9 +67,9 @@ err_metadata:
static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
{
+ int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
struct mlx5_core_dev *dev0;
int err;
- int idx;
int i;
if (ldev->mode == MLX5_LAG_MODE_MPESW)
@@ -78,11 +78,10 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
if (ldev->mode != MLX5_LAG_MODE_NONE)
return -EINVAL;
- idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
if (idx < 0)
return -EINVAL;
- dev0 = ldev->pf[idx].dev;
+ dev0 = mlx5_lag_pf(ldev, idx)->dev;
if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS ||
!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) ||
!MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) ||
@@ -105,7 +104,7 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
mlx5_ldev_for_each(i, 0, ldev) {
- err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
if (err)
goto err_rescan_drivers;
}
@@ -121,7 +120,7 @@ err_rescan_drivers:
err_add_devices:
mlx5_lag_add_devices(ldev);
mlx5_ldev_for_each(i, 0, ldev)
- mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
mlx5_mpesw_metadata_cleanup(ldev);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index 16c7d16215c4..2a034b2a3eee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -50,7 +50,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
if (first_idx < 0)
return -EINVAL;
- dev = ldev->pf[first_idx].dev;
+ dev = mlx5_lag_pf(ldev, first_idx)->dev;
ft_attr.max_fte = ldev->ports * ldev->buckets;
ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER;
@@ -84,8 +84,12 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
idx = i * ldev->buckets + j;
affinity = ports[idx];
- dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev,
- vhca_id);
+ /* affinity is 1-indexed device index,
+ * use reverse lookup.
+ */
+ dest.vport.vhca_id =
+ MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, affinity - 1)->dev,
+ vhca_id);
lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
NULL, &flow_act,
&dest, 1);
@@ -307,7 +311,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash,
if (first_idx < 0)
return ERR_PTR(-EINVAL);
- dev = ldev->pf[first_idx].dev;
+ dev = mlx5_lag_pf(ldev, first_idx)->dev;
lag_definer = kzalloc_obj(*lag_definer);
if (!lag_definer)
return ERR_PTR(-ENOMEM);
@@ -356,8 +360,8 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
if (first_idx < 0)
return;
- dev = ldev->pf[first_idx].dev;
- mlx5_ldev_for_each(i, first_idx, ldev) {
+ dev = mlx5_lag_pf(ldev, first_idx)->dev;
+ mlx5_ldev_for_each(i, 0, ldev) {
for (j = 0; j < ldev->buckets; j++) {
idx = i * ldev->buckets + j;
mlx5_del_flow_rules(lag_definer->rules[idx]);
@@ -520,7 +524,7 @@ static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev)
if (first_idx < 0)
return -EINVAL;
- dev = ldev->pf[first_idx].dev;
+ dev = mlx5_lag_pf(ldev, first_idx)->dev;
mlx5_lag_set_outer_ttc_params(ldev, &ttc_params);
port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params);
return PTR_ERR_OR_ZERO(port_sel->outer.ttc);
@@ -536,7 +540,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
if (first_idx < 0)
return -EINVAL;
- dev = ldev->pf[first_idx].dev;
+ dev = mlx5_lag_pf(ldev, first_idx)->dev;
mlx5_lag_set_inner_ttc_params(ldev, &ttc_params);
port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params);
return PTR_ERR_OR_ZERO(port_sel->inner.ttc);
@@ -594,8 +598,12 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
if (ldev->v2p_map[idx] == ports[idx])
continue;
- dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
- vhca_id);
+ /* ports[] contains 1-indexed device indices,
+ * use reverse lookup.
+ */
+ dest.vport.vhca_id =
+ MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, ports[idx] - 1)->dev,
+ vhca_id);
err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 954942ad93c5..762c783156b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -107,7 +107,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
/* Disconnect secondaries from the network */
if (!MLX5_CAP_GEN(dev, eswitch_manager))
return false;
- if (!MLX5_CAP_GEN(dev, silent_mode))
+ if (!MLX5_CAP_GEN(dev, silent_mode_set))
return false;
/* RX steering from primary to secondaries */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1c35c3fc3bb3..dc7f20a357d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1779,6 +1779,7 @@ static const int types[] = {
MLX5_CAP_CRYPTO,
MLX5_CAP_SHAMPO,
MLX5_CAP_ADV_RDMA,
+ MLX5_CAP_TLP_EMULATION,
};
static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)