diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-03-18 19:08:49 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-03-18 19:08:50 -0700 |
| commit | 76eea68d5fe5c6474b4f2f63f785fd9f12789f5c (patch) | |
| tree | 934483890bdee8e0d8f8d5bc633fa987dfdf61a1 /drivers/net | |
| parent | d5516452a362aab2c136ab815967c4417c92d228 (diff) | |
| parent | 4dd2115f43594da5271a1aa34fde6719b4259047 (diff) | |
Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Tariq Toukan says:
====================
mlx5-next updates 2026-03-17
The following pull-request contains common mlx5 updates
* 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
net/mlx5: Expose MLX5_UMR_ALIGN definition
{net/RDMA}/mlx5: Add LAG demux table API and vport demux rules
net/mlx5: Add VHCA RX flow destination support for FW steering
net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number
net/mlx5: E-switch, modify peer miss rule index to vhca_id
net/mlx5: LAG, use xa_alloc to manage LAG device indices
net/mlx5: LAG, replace pf array with xarray
net/mlx5: Add silent mode set/query and VHCA RX IFC bits
net/mlx5: Add IFC bits for shared headroom pool PBMC support
net/mlx5: Expose TLP emulation capabilities
net/mlx5: Add TLP emulation device capabilities
====================
Link: https://patch.msgid.link/20260317075844.12066-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net')
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 3 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 14 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 103 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 6 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 17 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c | 3 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 684 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 49 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 20 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 15 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 28 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c | 2 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 |
15 files changed, 778 insertions, 182 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 6d73127b7217..2cf1d3825def 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -282,6 +282,9 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_NONE: trace_seq_printf(p, "none\n"); break; + case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX: + trace_seq_printf(p, "rx_vhca_id=%u\n", dst->vhca.id); + break; } trace_seq_putc(p, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1434b65d4746..397a93584fd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -35,6 +35,7 @@ #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/lag.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> #include <linux/refcount.h> @@ -2131,7 +2132,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, mutex_unlock(&esw->offloads.peer_mutex); list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { - if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) + if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev)) continue; list_del(&peer_flow->peer_flows); @@ -2154,7 +2155,7 @@ static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow) devcom = flow->priv->mdev->priv.eswitch->devcom; mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { - i = mlx5_get_dev_index(peer_esw->dev); + i = mlx5_lag_get_dev_seq(peer_esw->dev); mlx5e_tc_del_fdb_peer_flow(flow, i); } } @@ -4584,7 +4585,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; - int i = mlx5_get_dev_index(peer_esw->dev); + int i = mlx5_lag_get_dev_seq(peer_esw->dev); struct mlx5e_rep_priv *peer_urpriv; struct mlx5e_tc_flow *peer_flow; struct mlx5_core_dev *in_mdev; @@ -5525,7 +5526,7 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) devcom = esw->devcom; mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { - i = mlx5_get_dev_index(peer_esw->dev); + i = mlx5_lag_get_dev_seq(peer_esw->dev); list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i]) mlx5e_tc_del_fdb_peers_flow(flow); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index c2563bee74df..5128f5020dae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -273,7 +273,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *send_to_vport_meta_grp; struct mlx5_flow_group *peer_miss_grp; - struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS]; + struct xarray peer_miss_rules; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; @@ -942,6 +942,12 @@ int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, u16 vport_num); bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev); void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev); +struct mlx5_flow_group * +mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft); +struct mlx5_flow_handle * +mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num, + struct mlx5_flow_table *lag_ft); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -1027,6 +1033,12 @@ mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id) static inline void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev) {} +static inline struct mlx5_flow_handle * +mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num, + struct mlx5_flow_table *lag_ft) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7a9ee36b8dca..bcde267df8eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1190,7 +1190,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_flow_handle *flow; struct mlx5_vport *peer_vport; struct mlx5_flow_spec *spec; - int err, pfindex; + int err; unsigned long i; void *misc; @@ -1270,14 +1270,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } } - pfindex = mlx5_get_dev_index(peer_dev); - if (pfindex >= MLX5_MAX_PORTS) { - esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n", - pfindex, MLX5_MAX_PORTS); - err = -EINVAL; + err = xa_insert(&esw->fdb_table.offloads.peer_miss_rules, + MLX5_CAP_GEN(peer_dev, vhca_id), flows, GFP_KERNEL); + if (err) goto add_ec_vf_flow_err; - } - esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows; kvfree(spec); return 0; @@ -1319,12 +1315,13 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; - u16 peer_index = mlx5_get_dev_index(peer_dev); + u16 peer_vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); struct mlx5_flow_handle **flows; struct mlx5_vport *peer_vport; unsigned long i; - flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; + flows = xa_erase(&esw->fdb_table.offloads.peer_miss_rules, + peer_vhca_id); if (!flows) return; @@ -1350,7 +1347,6 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } kvfree(flows); - esw->fdb_table.offloads.peer_miss_rules[peer_index] = NULL; } static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) @@ -1460,6 +1456,83 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) return flow_rule; } +struct mlx5_flow_group * +mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + void *match_criteria; + void *flow_group_in; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return ERR_PTR(-EOPNOTSUPP); + + if (IS_ERR(ft)) + return ERR_CAST(ft); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return ERR_PTR(-ENOMEM); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + fg = mlx5_create_flow_group(ft, flow_group_in); + kvfree(flow_group_in); + if (IS_ERR(fg)) + esw_warn(esw->dev, "Can't create LAG demux flow group\n"); + + return fg; +} + +struct mlx5_flow_handle * +mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num, + struct mlx5_flow_table *lag_ft) +{ + struct mlx5_flow_spec *spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *ret; + void *misc; + + if (!spec) + return ERR_PTR(-ENOMEM); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + kvfree(spec); + return ERR_PTR(-EOPNOTSUPP); + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VHCA_RX; + dest.vhca.id = MLX5_CAP_GEN(esw->dev, vhca_id); + + ret = mlx5_add_flow_rules(lag_ft, spec, &flow_act, &dest, 1); + kvfree(spec); + return ret; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -2048,7 +2121,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) if (IS_ERR(g)) { err = PTR_ERR(g); - mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + esw_warn(esw->dev, "Failed to create vport rx group err %d\n", + err); goto out; } @@ -2093,7 +2167,8 @@ static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw) if (IS_ERR(g)) { err = PTR_ERR(g); - mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err); + esw_warn(esw->dev, + "Failed to create vport rx drop group err %d\n", err); goto out; } @@ -3247,6 +3322,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, return; xa_init(&esw->paired); + xa_init(&esw->fdb_table.offloads.peer_miss_rules); esw->num_peers = 0; esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc, MLX5_DEVCOM_ESW_OFFLOADS, @@ -3274,6 +3350,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(esw->devcom); xa_destroy(&esw->paired); + xa_destroy(&esw->fdb_table.offloads.peer_miss_rules); esw->devcom = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index c348ee62cd3a..1cd4cd898ec2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -716,6 +716,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.ft->id; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE; break; + case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX: + id = dst->dest_attr.vhca.id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX; + break; default: id = dst->dest_attr.tir_num; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; @@ -1183,7 +1187,7 @@ int mlx5_fs_cmd_set_l2table_entry_silent(struct mlx5_core_dev *dev, u8 silent_mo { u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {}; - if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode)) + if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode_set)) return -EOPNOTSUPP; MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2c3544880a30..61a6ba1e49dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -503,7 +503,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || type == MLX5_FLOW_DESTINATION_TYPE_TIR || type == MLX5_FLOW_DESTINATION_TYPE_RANGE || - type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE || + type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX; } static bool check_valid_spec(const struct mlx5_flow_spec *spec) @@ -1437,15 +1438,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table* mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, - int prio, u32 level) + struct mlx5_flow_table_attr *ft_attr) { - struct mlx5_flow_table_attr ft_attr = {}; - - ft_attr.level = level; - ft_attr.prio = prio; - ft_attr.max_fte = 1; - - return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); @@ -1890,7 +1885,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, d1->range.hit_ft == d2->range.hit_ft && d1->range.miss_ft == d2->range.miss_ft && d1->range.min == d2->range.min && - d1->range.max == d2->range.max)) + d1->range.max == d2->range.max) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX && + d1->vhca.id == d2->vhca.id)) return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index eeb4437975f2..55249f405841 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -255,6 +255,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, tlp_device_emulation_manager)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_TLP_EMULATION, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, ipsec_offload)) { err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPSEC, HCA_CAP_OPMOD_GET_CUR); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 62b6faa4276a..37de4be0e620 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -145,7 +145,8 @@ static int members_show(struct seq_file *file, void *priv) ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); mlx5_ldev_for_each(i, 0, ldev) - seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + seq_printf(file, "%s\n", + dev_name(mlx5_lag_pf(ldev, i)->dev->device)); mutex_unlock(&ldev->lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 044adfdf9aa2..449e4bd86c06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -35,6 +35,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> +#include <linux/mlx5/lag.h> #include "lib/mlx5.h" #include "lib/devcom.h" #include "mlx5_core.h" @@ -232,6 +233,7 @@ static void mlx5_do_bond_work(struct work_struct *work); static void mlx5_ldev_free(struct kref *ref) { struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); + struct lag_func *pf; struct net *net; int i; @@ -241,13 +243,16 @@ static void mlx5_ldev_free(struct kref *ref) } mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].dev && - ldev->pf[i].port_change_nb.nb.notifier_call) { - struct mlx5_nb *nb = &ldev->pf[i].port_change_nb; + pf = mlx5_lag_pf(ldev, i); + if (pf->port_change_nb.nb.notifier_call) { + struct mlx5_nb *nb = &pf->port_change_nb; - mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb); + mlx5_eq_notifier_unregister(pf->dev, nb); } + xa_erase(&ldev->pfs, i); + kfree(pf); } + xa_destroy(&ldev->pfs); mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); @@ -284,6 +289,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) kref_init(&ldev->ref); mutex_init(&ldev->lock); + xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work); @@ -309,23 +315,54 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev) { + struct lag_func *pf; int i; - mlx5_ldev_for_each(i, 0, ldev) - if (ldev->pf[i].netdev == ndev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == ndev) return i; + } return -ENOENT; } +static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev) +{ + unsigned long idx = 0; + void *entry; + + if (!ldev) + return -ENOENT; + + entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); + if (!entry) + return -ENOENT; + + return (int)idx; +} + int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) { - int i, num = 0; + int master_idx, i, num = 0; if (!ldev) return -ENOENT; + master_idx = mlx5_lag_get_master_idx(ldev); + + /* If seq 0 is requested and there's a primary PF, return it */ + if (master_idx >= 0) { + if (seq == 0) + return master_idx; + num++; + } + mlx5_ldev_for_each(i, 0, ldev) { + /* Skip the primary PF in the loop */ + if (i == master_idx) + continue; + if (num == seq) return i; num++; @@ -333,6 +370,108 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) return -ENOENT; } +/* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its + * sequence number in the LAG. Master is always 0, others numbered + * sequentially starting from 1. + */ +int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = mlx5_lag_dev(dev); + int master_idx, i, num = 1; + struct lag_func *pf; + + if (!ldev) + return -ENOENT; + + master_idx = mlx5_lag_get_master_idx(ldev); + if (master_idx < 0) + return -ENOENT; + + pf = mlx5_lag_pf(ldev, master_idx); + if (pf && pf->dev == dev) + return 0; + + mlx5_ldev_for_each(i, 0, ldev) { + if (i == master_idx) + continue; + pf = mlx5_lag_pf(ldev, i); + if (pf->dev == dev) + return num; + num++; + } + return -ENOENT; +} +EXPORT_SYMBOL(mlx5_lag_get_dev_seq); + +/* Devcom events for LAG master marking */ +#define LAG_DEVCOM_PAIR (0) +#define LAG_DEVCOM_UNPAIR (1) + +static void mlx5_lag_mark_master(struct mlx5_lag *ldev) +{ + int lowest_dev_idx = INT_MAX; + struct lag_func *pf; + int master_xa_idx = -1; + int dev_idx; + int i; + + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + dev_idx = mlx5_get_dev_index(pf->dev); + if (dev_idx < lowest_dev_idx) { + lowest_dev_idx = dev_idx; + master_xa_idx = i; + } + } + + if (master_xa_idx >= 0) + xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER); +} + +static void mlx5_lag_clear_master(struct mlx5_lag *ldev) +{ + unsigned long idx = 0; + void *entry; + + entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); + if (!entry) + return; + + xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER); +} + +/* Devcom event handler to manage LAG master marking */ +static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data) +{ + struct mlx5_core_dev *dev = my_data; + struct mlx5_lag *ldev; + int idx; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return 0; + + mutex_lock(&ldev->lock); + switch (event) { + case LAG_DEVCOM_PAIR: + /* No need to mark more than once */ + idx = mlx5_lag_get_master_idx(ldev); + if (idx >= 0) + break; + /* Check if all LAG ports are now registered */ + if (mlx5_lag_num_devs(ldev) == ldev->ports) + mlx5_lag_mark_master(ldev); + break; + + case LAG_DEVCOM_UNPAIR: + /* Clear master mark when a device is removed */ + mlx5_lag_clear_master(ldev); + break; + } + mutex_unlock(&ldev->lock); + return 0; +} + int mlx5_lag_num_devs(struct mlx5_lag *ldev) { int i, num = 0; @@ -349,14 +488,17 @@ int mlx5_lag_num_devs(struct mlx5_lag *ldev) int mlx5_lag_num_netdevs(struct mlx5_lag *ldev) { + struct lag_func *pf; int i, num = 0; if (!ldev) return 0; - mlx5_ldev_for_each(i, 0, ldev) - if (ldev->pf[i].netdev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev) num++; + } return num; } @@ -400,11 +542,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Use native mapping by default where each port's buckets * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + * ports[] values are 1-indexed device indices for FW. */ mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < buckets; j++) { idx = i * buckets + j; - ports[idx] = i + 1; + ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1; } } @@ -416,33 +559,42 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Go over the disabled ports and for each assign a random active port */ for (i = 0; i < disabled_ports_num; i++) { for (j = 0; j < buckets; j++) { + int rand_xa_idx; + get_random_bytes(&rand, 4); - ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + rand_xa_idx = enabled[rand % enabled_ports_num]; + ports[disabled[i] * buckets + j] = + mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1; } } } static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; - mlx5_ldev_for_each(i, 0, ldev) - if (ldev->pf[i].has_drop) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->has_drop) return true; + } return false; } static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; mlx5_ldev_for_each(i, 0, ldev) { - if (!ldev->pf[i].has_drop) + pf = mlx5_lag_pf(ldev, i); + if (!pf->has_drop) continue; - mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch, MLX5_VPORT_UPLINK); - ldev->pf[i].has_drop = false; + pf->has_drop = false; } } @@ -451,6 +603,7 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, { u8 disabled_ports[MLX5_MAX_PORTS] = {}; struct mlx5_core_dev *dev; + struct lag_func *pf; int disabled_index; int num_disabled; int err; @@ -468,11 +621,12 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, for (i = 0; i < num_disabled; i++) { disabled_index = disabled_ports[i]; - dev = ldev->pf[disabled_index].dev; + pf = mlx5_lag_pf(ldev, disabled_index); + dev = pf->dev; err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, MLX5_VPORT_UPLINK); if (!err) - ldev->pf[disabled_index].has_drop = true; + pf->has_drop = true; else mlx5_core_err(dev, "Failed to create lag drop rule, error: %d", err); @@ -504,7 +658,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) if (idx < 0) return -EINVAL; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { ret = mlx5_lag_port_sel_modify(ldev, ports); if (ret || @@ -521,6 +675,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; + struct lag_func *pf; struct mlx5_lag *ldev; unsigned long flags; int i, last_idx; @@ -531,14 +686,17 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev if (!ldev) goto unlock; - mlx5_ldev_for_each(i, 0, ldev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); if (ldev->tracker.netdev_state[i].tx_enabled) - ndev = ldev->pf[i].netdev; + ndev = pf->netdev; + } if (!ndev) { last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1); if (last_idx < 0) goto unlock; - ndev = ldev->pf[last_idx].netdev; + pf = mlx5_lag_pf(ldev, last_idx); + ndev = pf->netdev; } dev_hold(ndev); @@ -563,7 +721,7 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, if (first_idx < 0) return; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); mlx5_ldev_for_each(i, 0, ldev) { @@ -615,7 +773,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, mode == MLX5_LAG_MODE_MULTIPATH) return 0; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { if (ldev->ports > 2) @@ -661,19 +819,24 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_eswitch *master_esw; struct mlx5_core_dev *dev0; int i, j; int err; - if (first_idx < 0) + if (master_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, master_idx)->dev; master_esw = dev0->priv.eswitch; - mlx5_ldev_for_each(i, first_idx + 1, ldev) { - struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; + mlx5_ldev_for_each(i, 0, ldev) { + struct mlx5_eswitch *slave_esw; + + if (i == master_idx) + continue; + + slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch; err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, slave_esw, ldev->ports); @@ -682,9 +845,12 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) } return 0; err: - mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev) + mlx5_ldev_for_each_reverse(j, i, 0, ldev) { + if (j == master_idx) + continue; mlx5_eswitch_offloads_single_fdb_del_one(master_esw, - ldev->pf[j].dev->priv.eswitch); + mlx5_lag_pf(ldev, j)->dev->priv.eswitch); + } return err; } @@ -693,8 +859,8 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, unsigned long flags) { - bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; struct mlx5_core_dev *dev0; int err; @@ -702,7 +868,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, if (first_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; if (tracker) mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", @@ -740,16 +906,17 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, bool shared_fdb) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); bool roce_lag = mode == MLX5_LAG_MODE_ROCE; struct mlx5_core_dev *dev0; unsigned long flags = 0; + int master_idx; int err; - if (first_idx < 0) + master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + if (master_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, master_idx)->dev; err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); if (err) return err; @@ -793,7 +960,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_deactivate_lag(struct mlx5_lag *ldev) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); unsigned long flags = ldev->mode_flags; @@ -802,19 +969,22 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) int err; int i; - if (first_idx < 0) + if (master_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, master_idx)->dev; master_esw = dev0->priv.eswitch; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - mlx5_ldev_for_each(i, first_idx + 1, ldev) + mlx5_ldev_for_each(i, 0, ldev) { + if (i == master_idx) + continue; mlx5_eswitch_offloads_single_fdb_del_one(master_esw, - ldev->pf[i].dev->priv.eswitch); + mlx5_lag_pf(ldev, i)->dev->priv.eswitch); + } clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); } @@ -844,68 +1014,82 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); #ifdef CONFIG_MLX5_ESWITCH struct mlx5_core_dev *dev; u8 mode; #endif + struct lag_func *pf; bool roce_support; int i; - if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) + if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) return false; #ifdef CONFIG_MLX5_ESWITCH mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + pf = mlx5_lag_pf(ldev, i); + dev = pf->dev; if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) return false; } - dev = ldev->pf[first_idx].dev; + pf = mlx5_lag_pf(ldev, master_idx); + dev = pf->dev; mode = mlx5_eswitch_mode(dev); - mlx5_ldev_for_each(i, 0, ldev) - if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (mlx5_eswitch_mode(pf->dev) != mode) return false; + } #else - mlx5_ldev_for_each(i, 0, ldev) - if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (mlx5_sriov_is_enabled(pf->dev)) return false; + } #endif - roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev); - mlx5_ldev_for_each(i, first_idx + 1, ldev) - if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) + pf = mlx5_lag_pf(ldev, master_idx); + roce_support = mlx5_get_roce_state(pf->dev); + mlx5_ldev_for_each(i, 0, ldev) { + if (i == master_idx) + continue; + pf = mlx5_lag_pf(ldev, i); + if (mlx5_get_roce_state(pf->dev) != roce_support) return false; + } return true; } void mlx5_lag_add_devices(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].dev->priv.flags & - MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + pf = mlx5_lag_pf(ldev, i); + if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; - ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(ldev->pf[i].dev); + pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(pf->dev); } } void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].dev->priv.flags & - MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + pf = mlx5_lag_pf(ldev, i); + if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; - ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(ldev->pf[i].dev); + pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(pf->dev); } } @@ -921,7 +1105,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (idx < 0) return; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; roce_lag = __mlx5_lag_is_roce(ldev); if (shared_fdb) { @@ -931,8 +1115,11 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - mlx5_ldev_for_each(i, idx + 1, ldev) - mlx5_nic_vport_disable_roce(ldev->pf[i].dev); + mlx5_ldev_for_each(i, 0, ldev) { + if (i == idx) + continue; + mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev); + } } err = mlx5_deactivate_lag(ldev); @@ -944,21 +1131,25 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (shared_fdb) mlx5_ldev_for_each(i, 0, ldev) - if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); } bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) { - int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; + bool ret = false; + int idx; int i; + idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); if (idx < 0) return false; - mlx5_ldev_for_each(i, idx + 1, ldev) { - dev = ldev->pf[i].dev; + mlx5_ldev_for_each(i, 0, ldev) { + if (i == idx) + continue; + dev = mlx5_lag_pf(ldev, i)->dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && MLX5_CAP_GEN(dev, lag_native_fdb_selection) && @@ -969,28 +1160,33 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) return false; } - dev = ldev->pf[idx].dev; + dev = mlx5_lag_pf(ldev, idx)->dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) - return true; + ret = true; - return false; + return ret; } static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) { bool roce_lag = true; + struct lag_func *pf; int i; - mlx5_ldev_for_each(i, 0, ldev) - roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev); + } #ifdef CONFIG_MLX5_ESWITCH - mlx5_ldev_for_each(i, 0, ldev) - roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev); + } #endif return roce_lag; @@ -1014,13 +1210,17 @@ mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed, int (*get_speed)(struct mlx5_core_dev *, u32 *)) { struct mlx5_core_dev *pf_mdev; + struct lag_func *pf; int pf_idx; u32 speed; int ret; *sum_speed = 0; mlx5_ldev_for_each(pf_idx, 0, ldev) { - pf_mdev = ldev->pf[pf_idx].dev; + pf = mlx5_lag_pf(ldev, pf_idx); + if (!pf) + continue; + pf_mdev = pf->dev; if (!pf_mdev) continue; @@ -1086,6 +1286,7 @@ static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev, void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) { struct mlx5_core_dev *mdev; + struct lag_func *pf; u32 speed; int pf_idx; @@ -1105,7 +1306,10 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) speed = speed / MLX5_MAX_TX_SPEED_UNIT; mlx5_ldev_for_each(pf_idx, 0, ldev) { - mdev = ldev->pf[pf_idx].dev; + pf = mlx5_lag_pf(ldev, pf_idx); + if (!pf) + continue; + mdev = pf->dev; if (!mdev) continue; @@ -1116,12 +1320,16 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) { struct mlx5_core_dev *mdev; + struct lag_func *pf; u32 speed; int pf_idx; int ret; mlx5_ldev_for_each(pf_idx, 0, ldev) { - mdev = ldev->pf[pf_idx].dev; + pf = mlx5_lag_pf(ldev, pf_idx); + if (!pf) + continue; + mdev = pf->dev; if (!mdev) continue; @@ -1152,7 +1360,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (idx < 0) return; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; if (!mlx5_lag_is_ready(ldev)) { do_bond = false; } else { @@ -1182,16 +1390,23 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_lag_add_devices(ldev); if (shared_fdb) { mlx5_ldev_for_each(i, 0, ldev) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); } return; - } else if (roce_lag) { + } + + if (roce_lag) { + struct mlx5_core_dev *dev; + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - mlx5_ldev_for_each(i, idx + 1, ldev) { - if (mlx5_get_roce_state(ldev->pf[i].dev)) - mlx5_nic_vport_enable_roce(ldev->pf[i].dev); + mlx5_ldev_for_each(i, 0, ldev) { + if (i == idx) + continue; + dev = mlx5_lag_pf(ldev, i)->dev; + if (mlx5_get_roce_state(dev)) + mlx5_nic_vport_enable_roce(dev); } } else if (shared_fdb) { int i; @@ -1200,7 +1415,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_rescan_drivers_locked(dev0); mlx5_ldev_for_each(i, 0, ldev) { - err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); if (err) break; } @@ -1211,7 +1426,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); mlx5_ldev_for_each(i, 0, ldev) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; } @@ -1243,16 +1458,171 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) { struct mlx5_devcom_comp_dev *devcom = NULL; + struct lag_func *pf; int i; mutex_lock(&ldev->lock); i = mlx5_get_next_ldev_func(ldev, 0); - if (i < MLX5_MAX_PORTS) - devcom = ldev->pf[i].dev->priv.hca_devcom_comp; + if (i < MLX5_MAX_PORTS) { + pf = mlx5_lag_pf(ldev, i); + devcom = pf->dev->priv.hca_devcom_comp; + } mutex_unlock(&ldev->lock); return devcom; } +static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_lag *ldev) +{ +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_flow_namespace *ns; + struct mlx5_flow_group *fg; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); + if (!ns) + return 0; + + ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr); + if (IS_ERR(ldev->lag_demux_ft)) + return PTR_ERR(ldev->lag_demux_ft); + + fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch, + ldev->lag_demux_ft); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_destroy_flow_table(ldev->lag_demux_ft); + ldev->lag_demux_ft = NULL; + return err; + } + + ldev->lag_demux_fg = fg; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_lag *ldev) +{ + struct mlx5_flow_namespace *ns; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); + if (!ns) + return 0; + + ldev->lag_demux_fg = NULL; + ft_attr->max_fte = 1; + ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr); + if (IS_ERR(ldev->lag_demux_ft)) { + err = PTR_ERR(ldev->lag_demux_ft); + ldev->lag_demux_ft = NULL; + return err; + } + + return 0; +} + +int mlx5_lag_demux_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr) +{ + struct mlx5_lag *ldev; + + if (!ft_attr) + return -EINVAL; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return -ENODEV; + + xa_init(&ldev->lag_demux_rules); + + if (mlx5_get_sd(dev)) + return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev); + + return mlx5_lag_demux_fw_init(dev, ft_attr, ldev); +} +EXPORT_SYMBOL(mlx5_lag_demux_init); + +void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_handle *rule; + struct mlx5_lag *ldev; + unsigned long vport_num; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + xa_for_each(&ldev->lag_demux_rules, vport_num, rule) + mlx5_del_flow_rules(rule); + xa_destroy(&ldev->lag_demux_rules); + + if (ldev->lag_demux_fg) + mlx5_destroy_flow_group(ldev->lag_demux_fg); + if (ldev->lag_demux_ft) + mlx5_destroy_flow_table(ldev->lag_demux_ft); + ldev->lag_demux_fg = NULL; + ldev->lag_demux_ft = NULL; +} +EXPORT_SYMBOL(mlx5_lag_demux_cleanup); + +int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num, + int index) +{ + struct mlx5_flow_handle *rule; + struct mlx5_lag *ldev; + int err; + + ldev = mlx5_lag_dev(vport_dev); + if (!ldev || !ldev->lag_demux_fg) + return 0; + + if (xa_load(&ldev->lag_demux_rules, index)) + return 0; + + rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch, + vport_num, ldev->lag_demux_ft); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(vport_dev, + "Failed to create LAG demux rule for vport %u, err %d\n", + vport_num, err); + return err; + } + + err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule, + GFP_KERNEL)); + if (err) { + mlx5_del_flow_rules(rule); + mlx5_core_warn(vport_dev, + "Failed to store LAG demux rule for vport %u, err %d\n", + vport_num, err); + } + + return err; +} +EXPORT_SYMBOL(mlx5_lag_demux_rule_add); + +void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index) +{ + struct mlx5_flow_handle *rule; + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev || !ldev->lag_demux_fg) + return; + + rule = xa_erase(&ldev->lag_demux_rules, index); + if (rule) + mlx5_del_flow_rules(rule); +} +EXPORT_SYMBOL(mlx5_lag_demux_rule_del); + static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) { queue_delayed_work(ldev->wq, &ldev->bond_work, delay); @@ -1297,6 +1667,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded, is_in_lag, mode_supported; bool has_inactive = 0; + struct lag_func *pf; struct slave *slave; u8 bond_status = 0; int num_slaves = 0; @@ -1317,7 +1688,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == ndev_tmp) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == ndev_tmp) { idx++; break; } @@ -1537,59 +1909,92 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev, struct net_device *netdev) { - unsigned int fn = mlx5_get_dev_index(dev); + struct lag_func *pf; unsigned long flags; + int i; spin_lock_irqsave(&lag_lock, flags); - ldev->pf[fn].netdev = netdev; - ldev->tracker.netdev_state[fn].link_up = 0; - ldev->tracker.netdev_state[fn].tx_enabled = 0; + /* Find pf entry by matching dev pointer */ + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->dev == dev) { + pf->netdev = netdev; + ldev->tracker.netdev_state[i].link_up = 0; + ldev->tracker.netdev_state[i].tx_enabled = 0; + break; + } + } spin_unlock_irqrestore(&lag_lock, flags); } static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { + struct lag_func *pf; unsigned long flags; int i; spin_lock_irqsave(&lag_lock, flags); mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == netdev) { - ldev->pf[i].netdev = NULL; + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == netdev) { + pf->netdev = NULL; break; } } spin_unlock_irqrestore(&lag_lock, flags); } -static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, +static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - unsigned int fn = mlx5_get_dev_index(dev); + struct lag_func *pf; + u32 idx; + int err; + + pf = kzalloc_obj(*pf); + if (!pf) + return -ENOMEM; + + err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1), + GFP_KERNEL); + if (err) { + kfree(pf); + return err; + } - ldev->pf[fn].dev = dev; + pf->idx = idx; + pf->dev = dev; dev->priv.lag = ldev; - MLX5_NB_INIT(&ldev->pf[fn].port_change_nb, + MLX5_NB_INIT(&pf->port_change_nb, mlx5_lag_mpesw_port_change_event, PORT_CHANGE); - mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb); + mlx5_eq_notifier_register(dev, &pf->port_change_nb); + + return 0; } static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - int fn; + struct lag_func *pf; + int i; - fn = mlx5_get_dev_index(dev); - if (ldev->pf[fn].dev != dev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->dev == dev) + break; + } + if (i >= MLX5_MAX_PORTS) return; - if (ldev->pf[fn].port_change_nb.nb.notifier_call) - mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb); + if (pf->port_change_nb.nb.notifier_call) + mlx5_eq_notifier_unregister(dev, &pf->port_change_nb); - ldev->pf[fn].dev = NULL; + pf->dev = NULL; dev->priv.lag = NULL; + xa_erase(&ldev->pfs, pf->idx); + kfree(pf); } /* Must be called with HCA devcom component lock held */ @@ -1598,6 +2003,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) struct mlx5_devcom_comp_dev *pos = NULL; struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; + int err; tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos); if (tmp_dev) @@ -1609,7 +2015,12 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to alloc lag dev\n"); return 0; } - mlx5_ldev_add_mdev(ldev, dev); + err = mlx5_ldev_add_mdev(ldev, dev); + if (err) { + mlx5_core_err(dev, "Failed to add mdev to lag dev\n"); + mlx5_ldev_put(ldev); + return 0; + } return 0; } @@ -1619,7 +2030,12 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) return -EAGAIN; } mlx5_ldev_get(ldev); - mlx5_ldev_add_mdev(ldev, dev); + err = mlx5_ldev_add_mdev(ldev, dev); + if (err) { + mlx5_ldev_put(ldev); + mutex_unlock(&ldev->lock); + return err; + } mutex_unlock(&ldev->lock); return 0; @@ -1647,7 +2063,8 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) dev->priv.hca_devcom_comp = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, - &attr, NULL, dev); + &attr, mlx5_lag_devcom_event, + dev); if (!dev->priv.hca_devcom_comp) { mlx5_core_err(dev, "Failed to register devcom HCA component."); @@ -1678,6 +2095,9 @@ recheck: } mlx5_ldev_remove_mdev(ldev, dev); mutex_unlock(&ldev->lock); + /* Send devcom event to notify peers that a device is being removed */ + mlx5_devcom_send_event(dev->priv.hca_devcom_comp, + LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev); mlx5_lag_unregister_hca_devcom_comp(dev); mlx5_ldev_put(ldev); } @@ -1701,6 +2121,9 @@ recheck: msleep(100); goto recheck; } + /* Send devcom event to notify peers that a device was added */ + mlx5_devcom_send_event(dev->priv.hca_devcom_comp, + LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev); mlx5_ldev_add_debugfs(dev); } @@ -1746,21 +2169,25 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx) { + struct lag_func *pf; int i; - for (i = start_idx; i >= end_idx; i--) - if (ldev->pf[i].dev) + for (i = start_idx; i >= end_idx; i--) { + pf = xa_load(&ldev->pfs, i); + if (pf && pf->dev) return i; + } return -1; } int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx) { - int i; + struct lag_func *pf; + unsigned long idx; - for (i = start_idx; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - return i; + xa_for_each_start(&ldev->pfs, idx, pf, start_idx) + if (pf->dev) + return idx; return MLX5_MAX_PORTS; } @@ -1814,13 +2241,17 @@ bool mlx5_lag_is_master(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; bool res = false; int idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); - res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev; + if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) { + pf = mlx5_lag_pf(ldev, idx); + res = pf && dev == pf->dev; + } spin_unlock_irqrestore(&lag_lock, flags); return res; @@ -1899,6 +2330,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, { struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; u8 port = 0; int i; @@ -1908,7 +2340,8 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == slave) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == slave) { port = i; break; } @@ -1939,6 +2372,7 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int struct mlx5_core_dev *peer_dev = NULL; struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; int idx; spin_lock_irqsave(&lag_lock, flags); @@ -1948,9 +2382,11 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int if (*i == MLX5_MAX_PORTS) goto unlock; - mlx5_ldev_for_each(idx, *i, ldev) - if (ldev->pf[idx].dev != dev) + mlx5_ldev_for_each(idx, *i, ldev) { + pf = mlx5_lag_pf(ldev, idx); + if (pf->dev != dev) break; + } if (idx == MLX5_MAX_PORTS) { *i = idx; @@ -1958,7 +2394,8 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int } *i = idx + 1; - peer_dev = ldev->pf[idx].dev; + pf = mlx5_lag_pf(ldev, idx); + peer_dev = pf->dev; unlock: spin_unlock_irqrestore(&lag_lock, flags); @@ -1976,6 +2413,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int ret = 0, i, j, idx = 0; struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; int num_ports; void *out; @@ -1995,8 +2433,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = ldev->ports; - mlx5_ldev_for_each(i, 0, ldev) - mdev[idx++] = ldev->pf[i].dev; + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + mdev[idx++] = pf->dev; + } } else { num_ports = 1; mdev[MLX5_LAG_P1] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index be1afece5fdc..6c911374f409 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -5,8 +5,17 @@ #define __MLX5_LAG_H__ #include <linux/debugfs.h> +#include <linux/errno.h> +#include <linux/xarray.h> +#include <linux/mlx5/fs.h> #define MLX5_LAG_MAX_HASH_BUCKETS 16 +/* XArray mark for the LAG master device + * (device with lowest mlx5_get_dev_index). + * Note: XA_MARK_0 is reserved by XA_FLAGS_ALLOC for free-slot tracking. + */ +#define MLX5_LAG_XA_MARK_MASTER XA_MARK_1 + #include "mlx5_core.h" #include "mp.h" #include "port_sel.h" @@ -39,6 +48,7 @@ struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; bool has_drop; + unsigned int idx; /* xarray index assigned by LAG */ struct mlx5_nb port_change_nb; }; @@ -64,7 +74,7 @@ struct mlx5_lag { int mode_changes_in_progress; u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; struct kref ref; - struct lag_func pf[MLX5_MAX_PORTS]; + struct xarray pfs; struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; @@ -76,6 +86,9 @@ struct mlx5_lag { /* Protect lag fields/state changes */ struct mutex lock; struct lag_mpesw lag_mpesw; + struct mlx5_flow_table *lag_demux_ft; + struct mlx5_flow_group *lag_demux_fg; + struct xarray lag_demux_rules; }; static inline struct mlx5_lag * @@ -84,6 +97,34 @@ mlx5_lag_dev(struct mlx5_core_dev *dev) return dev->priv.lag; } +static inline struct lag_func * +mlx5_lag_pf(struct mlx5_lag *ldev, unsigned int idx) +{ + return xa_load(&ldev->pfs, idx); +} + +/* Get device index (mlx5_get_dev_index) from xarray index */ +static inline int mlx5_lag_xa_to_dev_idx(struct mlx5_lag *ldev, int xa_idx) +{ + struct lag_func *pf = mlx5_lag_pf(ldev, xa_idx); + + return pf ? mlx5_get_dev_index(pf->dev) : -ENOENT; +} + +/* Find lag_func by device index (reverse lookup from mlx5_get_dev_index) */ +static inline struct lag_func * +mlx5_lag_pf_by_dev_idx(struct mlx5_lag *ldev, int dev_idx) +{ + struct lag_func *pf; + unsigned long idx; + + xa_for_each(&ldev->pfs, idx, pf) { + if (mlx5_get_dev_index(pf->dev) == dev_idx) + return pf; + } + return NULL; +} + static inline bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { @@ -98,6 +139,12 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); +int mlx5_lag_demux_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr); +void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev); +int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num, + int vport_index); +void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index c4c2bf33ef35..f42e051fa7e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -29,8 +29,8 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) return false; - return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev, - ldev->pf[idx1].dev); + return mlx5_esw_multipath_prereq(mlx5_lag_pf(ldev, idx0)->dev, + mlx5_lag_pf(ldev, idx1)->dev); } bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) @@ -80,18 +80,18 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, tracker.netdev_state[idx1].link_up = true; break; default: - mlx5_core_warn(ldev->pf[idx0].dev, + mlx5_core_warn(mlx5_lag_pf(ldev, idx0)->dev, "Invalid affinity port %d", port); return; } if (tracker.netdev_state[idx0].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events, + mlx5_notifier_call_chain(mlx5_lag_pf(ldev, idx0)->dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); if (tracker.netdev_state[idx1].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events, + mlx5_notifier_call_chain(mlx5_lag_pf(ldev, idx1)->dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); @@ -146,7 +146,7 @@ mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, fib_dev = fib_info_nh(fi, i)->fib_nh_dev; ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev); if (ldev_idx >= 0) - return ldev->pf[ldev_idx].netdev; + return mlx5_lag_pf(ldev, ldev_idx)->netdev; } return NULL; @@ -178,7 +178,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, mp->fib.dst_len <= fen_info->dst_len && !(mp->fib.dst_len == fen_info->dst_len && fi->fib_priority < mp->fib.priority)) { - mlx5_core_dbg(ldev->pf[idx].dev, + mlx5_core_dbg(mlx5_lag_pf(ldev, idx)->dev, "Multipath entry with lower priority was rejected\n"); return; } @@ -194,7 +194,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, } if (nh_dev0 == nh_dev1) { - mlx5_core_warn(ldev->pf[idx].dev, + mlx5_core_warn(mlx5_lag_pf(ldev, idx)->dev, "Multipath offload doesn't support routes with multiple nexthops of the same device"); return; } @@ -203,7 +203,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, if (__mlx5_lag_is_active(ldev)) { mlx5_ldev_for_each(i, 0, ldev) { dev_idx++; - if (ldev->pf[i].netdev == nh_dev0) + if (mlx5_lag_pf(ldev, i)->netdev == nh_dev0) break; } mlx5_lag_set_port_affinity(ldev, dev_idx); @@ -240,7 +240,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, /* nh added/removed */ if (event == FIB_EVENT_NH_DEL) { mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == fib_nh->fib_nh_dev) + if (mlx5_lag_pf(ldev, i)->netdev == fib_nh->fib_nh_dev) break; dev_idx++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 74d5c2ed14ff..5eea12a6887a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -16,7 +16,7 @@ static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) int i; mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; esw = dev->priv.eswitch; pf_metadata = ldev->lag_mpesw.pf_metadata[i]; if (!pf_metadata) @@ -37,7 +37,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) int i, err; mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; esw = dev->priv.eswitch; pf_metadata = mlx5_esw_match_metadata_alloc(esw); if (!pf_metadata) { @@ -53,7 +53,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) } mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, (void *)0); } @@ -67,9 +67,9 @@ err_metadata: static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) { + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev0; int err; - int idx; int i; if (ldev->mode == MLX5_LAG_MODE_MPESW) @@ -78,11 +78,10 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) if (ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; - idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); if (idx < 0) return -EINVAL; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || @@ -105,7 +104,7 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_ldev_for_each(i, 0, ldev) { - err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); if (err) goto err_rescan_drivers; } @@ -121,7 +120,7 @@ err_rescan_drivers: err_add_devices: mlx5_lag_add_devices(ldev); mlx5_ldev_for_each(i, 0, ldev) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); mlx5_mpesw_metadata_cleanup(ldev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 16c7d16215c4..2a034b2a3eee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -50,7 +50,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, if (first_idx < 0) return -EINVAL; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; @@ -84,8 +84,12 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, idx = i * ldev->buckets + j; affinity = ports[idx]; - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, - vhca_id); + /* affinity is 1-indexed device index, + * use reverse lookup. + */ + dest.vport.vhca_id = + MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, affinity - 1)->dev, + vhca_id); lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, NULL, &flow_act, &dest, 1); @@ -307,7 +311,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, if (first_idx < 0) return ERR_PTR(-EINVAL); - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; lag_definer = kzalloc_obj(*lag_definer); if (!lag_definer) return ERR_PTR(-ENOMEM); @@ -356,8 +360,8 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, if (first_idx < 0) return; - dev = ldev->pf[first_idx].dev; - mlx5_ldev_for_each(i, first_idx, ldev) { + dev = mlx5_lag_pf(ldev, first_idx)->dev; + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); @@ -520,7 +524,7 @@ static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) if (first_idx < 0) return -EINVAL; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->outer.ttc); @@ -536,7 +540,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) if (first_idx < 0) return -EINVAL; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->inner.ttc); @@ -594,8 +598,12 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, if (ldev->v2p_map[idx] == ports[idx]) continue; - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, - vhca_id); + /* ports[] contains 1-indexed device indices, + * use reverse lookup. + */ + dest.vport.vhca_id = + MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, ports[idx] - 1)->dev, + vhca_id); err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 954942ad93c5..762c783156b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -107,7 +107,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) /* Disconnect secondaries from the network */ if (!MLX5_CAP_GEN(dev, eswitch_manager)) return false; - if (!MLX5_CAP_GEN(dev, silent_mode)) + if (!MLX5_CAP_GEN(dev, silent_mode_set)) return false; /* RX steering from primary to secondaries */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1c35c3fc3bb3..dc7f20a357d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1779,6 +1779,7 @@ static const int types[] = { MLX5_CAP_CRYPTO, MLX5_CAP_SHAMPO, MLX5_CAP_ADV_RDMA, + MLX5_CAP_TLP_EMULATION, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) |
