From 1ffeb2eb8be9936e9dc1f9af2d5f4c14d69a0d36 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:40 +0000 Subject: IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support 1. Introduce the basic SR-IOV parvirtualization context objects for multiplexing and demultiplexing MADs. 2. Introduce support for the new proxy and tunnel QP types. This patch introduces the objects required by the master for managing QP paravirtualization for guests. struct mlx4_ib_sriov is created by the master only. It is a container for the following: 1. All the info required by the PPF to multiplex and de-multiplex MADs (including those from the PF). (struct mlx4_ib_demux_ctx demux) 2. All the info required to manage alias GUIDs (i.e., the GUID at index 0 that each guest perceives. In fact, this is not the GUID which is actually at index 0, but is, in fact, the GUID which is at index[] in the physical table. 3. structures which are used to manage CM paravirtualization 4. structures for managing the real special QPs when running in SR-IOV mode. The real SQPs are controlled by the PPF in this case. All SQPs created and controlled by the ib core layer are proxy SQP. struct mlx4_ib_demux_ctx contains the information per port needed to manage paravirtualization: 1. All multicast paravirt info 2. All tunnel-qp paravirt info for the port. 3. GUID-table and GUID-prefix for the port 4. work queues. struct mlx4_ib_demux_pv_ctx contains all the info for managing the paravirtualized QPs for one slave/port. struct mlx4_ib_demux_pv_qp contains the info need to run an individual QP (either tunnel qp or real SQP). Note: We made use of the 2 most significant bits in enum mlx4_ib_qp_flags (based on enum ib_qp_create_flags in ib_verbs.h). We need these bits in the low-level driver for internal purposes. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6e1b0f973a03..07aa8232e631 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -389,6 +389,7 @@ struct mlx4_caps { enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; + u16 sqp_demux; }; struct mlx4_buf_list { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 338388ba260a..4b4ad6ffef92 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -126,7 +126,8 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 reserved1[2]; + u8 reserved1[1]; + u8 disable_pkey_check; u8 pkey_index; u8 counter_index; u8 grh_mylmc; -- cgit v1.2.3 From e2c76824ca16a3e8443cc7b26abcb21af7c27b10 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:41 +0000 Subject: mlx4_core: Add proxy and tunnel QPs to the reserved QP area In addition, pass the proxy and tunnel QP numbers to slaves so the driver can perform special QP paravirtualization. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 07aa8232e631..d5c82b7216de 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -693,7 +693,18 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->caps.sqp_start + 8); + return (qpn < dev->caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); +} + +static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) +{ + int base = dev->caps.sqp_start + slave * 8; + + if (qpn >= base && qpn < base + 8) + return 1; + + return 0; } static inline int mlx4_is_mfunc(struct mlx4_dev *dev) -- cgit v1.2.3 From fc06573dfaf8a33bc0533bb70c49de13fa5232a4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:42 +0000 Subject: IB/mlx4: Initialize SR-IOV IB support for slaves in master context Allocate SR-IOV paravirtualization resources and MAD demuxing contexts on the master. This has two parts. The first part is to initialize the structures to contain the contexts. This is done at master startup time in mlx4_ib_init_sriov(). The second part is to actually create the tunneling resources required on the master to support a slave. This is performed the master detects that a slave has started up (MLX4_DEV_EVENT_SLAVE_INIT event generated when a slave initializes its comm channel). For the master, there is no such startup event, so it creates its own tunneling resources when it starts up. In addition, the master also creates the real special QPs. The ib_core layer on the master causes creation of proxy special QPs, since the master is also paravirtualized at the ib_core layer. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 3 ++- include/linux/mlx4/driver.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d5c82b7216de..b6b8d341b6c8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -54,7 +54,8 @@ enum { }; enum { - MLX4_MAX_PORTS = 2 + MLX4_MAX_PORTS = 2, + MLX4_MAX_PORT_PKEYS = 128 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index d813704b963b..c257e1b211be 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -45,6 +45,8 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, + MLX4_DEV_EVENT_SLAVE_INIT, + MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; struct mlx4_interface { -- cgit v1.2.3 From 54679e148287f0ca1bdd09264c908bacb9f19b3f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:43 +0000 Subject: mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop This requires: 1. Replacing the paravirtualized P_Key index (inserted by the guest) with the real P_Key index. 2. For UD QPs, placing the guest's true source GID index in the address path structure mgid field, and setting the ud_force_mgid bit so that the mgid is taken from the QP context and not from the WQE when posting sends. 3. For UC and RC QPs, placing the guest's true source GID index in the address path structure mgid field. 4. For tunnel and proxy QPs, setting the Q_Key value reserved for that proxy/tunnel pair. Since not all the above adjustments occur in all the QP transitions, the QP transitions require separate wrapper functions. Secondly, initialize the P_Key virtualization table to its default values: Master virtualized table is 1-1 with the real P_Key table, guest virtualized table has P_Key index 0 mapped to the real P_Key index 0, and all the other P_Key indices mapped to the reserved (invalid) P_Key at index 127. Finally, add logic in smp_snoop for maintaining the phys_P_Key_cache. and generating events on the master only if a P_Key actually changed. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b6b8d341b6c8..9803fd5d3dba 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -940,6 +940,9 @@ int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, + int i, int val); + int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 993c401e207946fa56f69c51e39f015e7108497b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:48 +0000 Subject: mlx4_core: Add IB port-state machine and port mgmt event propagation For an IB port, a slave should not show port active until that slave has a valid alias-guid (provided by the subnet manager). Therefore the port-up event should be passed to a slave only after both the port is up, and the slave's alias-guid has been set. Also, provide the infrastructure for propagating port-management events (client-reregister, etc) to slaves. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9803fd5d3dba..380e01671d1e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -192,6 +192,25 @@ enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; +enum slave_port_state { + SLAVE_PORT_DOWN = 0, + SLAVE_PENDING_UP, + SLAVE_PORT_UP, +}; + +enum slave_port_gen_event { + SLAVE_PORT_GEN_EVENT_DOWN = 0, + SLAVE_PORT_GEN_EVENT_UP, + SLAVE_PORT_GEN_EVENT_NONE, +}; + +enum slave_port_state_event { + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP, + MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, + MLX4_PORT_STATE_IB_EVENT_GID_INVALID, +}; + enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, @@ -945,4 +964,13 @@ void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); +int mlx4_is_slave_active(struct mlx4_dev *dev, int slave); +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port); +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port); +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr); +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change); +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); + + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From efcd235d736ab05ef2b29d7fe1493a2f52b07b66 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:52 +0000 Subject: net/mlx4_core: Adjustments to SET_PORT for IB SR-IOV 1. Slaves may not set the IS_SM capability for the port. 2. DEV_MGMT may not be set in multifunction mode. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 380e01671d1e..b3f9b4500b9b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -53,6 +53,11 @@ enum { MLX4_FLAG_SRIOV = 1 << 4, }; +enum { + MLX4_PORT_CAP_IS_SM = 1 << 1, + MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19, +}; + enum { MLX4_MAX_PORTS = 2, MLX4_MAX_PORT_PKEYS = 128 -- cgit v1.2.3 From 026149cbaada391d98f1cbec47c488cb548f753a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:55 +0000 Subject: mlx4: Activate SR-IOV mode for IB Remove the error returns for IB ports from mlx4_ib_add, mlx4_INIT_PORT_wrapper, and mlx4_CLOSE_PORT_wrapper. Currently, SRIOV is supported only for devices for which the link layer is IB on all ports; RoCE support will be added later. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b3f9b4500b9b..9b243df789ef 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -697,6 +697,10 @@ struct mlx4_init_port_param { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) +#define mlx4_foreach_non_ib_transport_port(port, dev) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) + #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ -- cgit v1.2.3 From afa8fd1db9f295a0c4130bc6d87bf8b05bdd0523 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:56 +0000 Subject: mlx4: Paravirtualize Node Guids for slaves This is necessary in order to support > 1 VF/PF in a VM for software that uses the node guid as a discriminator, such as librdmacm. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9b243df789ef..c0f8f74a0a5e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -981,5 +981,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 47605df953985c2b792ac9f3ddf70d270b89adb8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:57 +0000 Subject: mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations Previously, the structure of a guest's proxy QPs followed the structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1, qp1 port 2, ...). The guest then did offset calculations on the sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP(). This is now changed so that the guest does no offset calculations regarding proxy or tunnel QPs to use. This change frees the PPF from needing to adhere to a specific order in allocating proxy and tunnel QPs. Now QUERY_FUNC_CAP provides each port individually with its proxy qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are used directly where required (with no offset calculations). To accomplish this change, several fields were added to the phys_caps structure for use by the PPF and by non-SR-IOV mode: base_sqpn -- in non-sriov mode, this was formerly sqp_start. base_proxy_sqpn -- the first physical proxy qp number -- used by PPF base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF. The current code in the PPF still adheres to the previous layout of sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this layout without affecting VF or (paravirtualized) PF code. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c0f8f74a0a5e..6d1acb04cd17 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -328,6 +328,9 @@ struct mlx4_phys_caps { u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; + u32 base_sqpn; + u32 base_proxy_sqpn; + u32 base_tunnel_sqpn; }; struct mlx4_caps { @@ -358,9 +361,10 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int sqp_start; - u32 base_sqpn; - u32 base_tunnel_sqpn; + u32 *qp0_proxy; + u32 *qp1_proxy; + u32 *qp0_tunnel; + u32 *qp1_tunnel; int num_srqs; int max_srq_wqes; int max_srq_sge; @@ -722,15 +726,15 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->caps.base_sqpn + 8 + + return (qpn < dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); } static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) { - int base = dev->caps.sqp_start + slave * 8; + int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8; - if (qpn >= base && qpn < base + 8) + if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8) return 1; return 0; -- cgit v1.2.3