diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2022-12-07 20:09:21 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2022-12-07 20:09:21 -0800 |
| commit | e1228581b38bd698d3502354c0807863c8b518f7 (patch) | |
| tree | 32af511225fd8fd00af5e913af48edd2ba66ca16 /include | |
| parent | 5955a948ac3d4e78857726d5e8d4bffb3fc71d4f (diff) | |
| parent | e5b9642a33be6f6a9ec2f035299f4a5c0980d7c0 (diff) | |
Merge branch 'devlink-add-port-function-attribute-to-enable-disable-roce-and-migratable'
Shay Drory says:
====================
devlink: Add port function attribute to enable/disable Roce and migratable
This series is a complete rewrite of the series "devlink: Add port
function attribute to enable/disable roce"
link:
https://lore.kernel.org/netdev/20221102163954.279266-1-danielj@nvidia.com/
Currently mlx5 PCI VF and SF are enabled by default for RoCE
functionality. And mlx5 PCI VF is disable by dafault for migratable
functionality.
Currently a user does not have the ability to disable RoCE for a PCI
VF/SF device before such device is enumerated by the driver.
User is also incapable to do such setting from smartnic scenario for a
VF from the smartnic.
Current 'enable_roce' device knob is limited to do setting only at
driverinit time. By this time device is already created and firmware has
already allocated necessary system memory for supporting RoCE.
Also, Currently a user does not have the ability to enable migratable
for a PCI VF.
The above are a hyper visor level control, to set the functionality of
devices passed through to guests.
This is achieved by extending existing 'port function' object to control
capabilities of a function. This enables users to control capability of
the device before enumeration.
Examples when user prefers to disable RoCE for a VF when using switchdev
mode:
$ devlink port show pci/0000:06:00.0/1
pci/0000:06:00.0/1: type eth netdev pf0vf0 flavour pcivf controller 0
pfnum 0 vfnum 0 external false splittable false
function:
hw_addr 00:00:00:00:00:00 roce enable
$ devlink port function set pci/0000:06:00.0/1 roce disable
$ devlink port show pci/0000:06:00.0/1
pci/0000:06:00.0/1: type eth netdev pf0vf0 flavour pcivf controller 0
pfnum 0 vfnum 0 external false splittable false
function:
hw_addr 00:00:00:00:00:00 roce disable
FAQs:
-----
1. What does roce enable/disable do?
Ans: It disables RoCE capability of the function before its enumerated,
so when driver reads the capability from the device firmware, it is
disabled.
At this point RDMA stack will not be able to create UD, QP1, RC, XRC
type of QPs. When RoCE is disabled, the GID table of all ports of the
device is disabled in the device and software stack.
2. How is the roce 'port function' option different from existing
devlink param?
Ans: RoCE attribute at the port function level disables the RoCE
capability at the specific function level; while enable_roce only does
at the software level.
3. Why is this option for disabling only RoCE and not the whole RDMA
device?
Ans: Because user still wants to use the RDMA device for non RoCE
commands in more memory efficient way.
====================
Link: https://lore.kernel.org/r/20221206185119.380138-1-shayd@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/mlx5/mlx5_ifc.h | 6 | ||||
| -rw-r--r-- | include/linux/mlx5/vport.h | 2 | ||||
| -rw-r--r-- | include/net/devlink.h | 39 | ||||
| -rw-r--r-- | include/uapi/linux/devlink.h | 13 |
4 files changed, 59 insertions, 1 deletions
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5a4e914e2a6f..2093131483c7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -68,6 +68,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25, }; @@ -1875,7 +1876,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { }; struct mlx5_ifc_cmd_hca_cap_2_bits { - u8 reserved_at_0[0xa0]; + u8 reserved_at_0[0x80]; + + u8 migratable[0x1]; + u8 reserved_at_81[0x1f]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index aad53cb72f17..7f31432f44c2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, + u16 opmod); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 5f6eca5e4a40..0f376a28b9c4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1452,6 +1452,45 @@ struct devlink_ops { const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** + * @port_fn_roce_get: Port function's roce get function. + * + * Query RoCE state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_roce_set: Port function's roce set function. + * + * Enable/Disable the RoCE state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_set)(struct devlink_port *devlink_port, + bool enable, struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_get: Port function's migratable get function. + * + * Query migratable state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_set: Port function's migratable set function. + * + * Enable/Disable migratable state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_set)(struct devlink_port *devlink_port, + bool enable, + struct netlink_ext_ack *extack); + /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance * @attrs: attributes of the new port diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 70191d96af89..3782d4219ac9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -658,11 +658,24 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_fn_attr_cap { + DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, + DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT, + + /* Add new caps above */ + __DEVLINK_PORT_FN_ATTR_CAPS_MAX, +}; + +#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT) +#define DEVLINK_PORT_FN_CAP_MIGRATABLE \ + _BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT) + enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ + DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 |
