summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/smc.h17
-rw-r--r--net/smc/af_smc.c431
-rw-r--r--net/smc/smc.h20
-rw-r--r--net/smc/smc_clc.c147
-rw-r--r--net/smc/smc_clc.h55
-rw-r--r--net/smc/smc_core.c173
-rw-r--r--net/smc/smc_core.h50
-rw-r--r--net/smc/smc_ib.c160
-rw-r--r--net/smc/smc_ib.h16
-rw-r--r--net/smc/smc_llc.c623
-rw-r--r--net/smc/smc_llc.h12
-rw-r--r--net/smc/smc_pnet.c41
-rw-r--r--net/smc/smc_wr.c237
-rw-r--r--net/smc/smc_wr.h8
14 files changed, 1607 insertions, 383 deletions
diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index b175bd0165a1..20f33b27787f 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -84,17 +84,28 @@ enum {
SMC_NLA_SYS_IS_ISM_V2, /* u8 */
SMC_NLA_SYS_LOCAL_HOST, /* string */
SMC_NLA_SYS_SEID, /* string */
+ SMC_NLA_SYS_IS_SMCR_V2, /* u8 */
__SMC_NLA_SYS_MAX,
SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
};
-/* SMC_NLA_LGR_V2 nested attributes */
+/* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */
enum {
SMC_NLA_LGR_V2_VER, /* u8 */
SMC_NLA_LGR_V2_REL, /* u8 */
SMC_NLA_LGR_V2_OS, /* u8 */
SMC_NLA_LGR_V2_NEG_EID, /* string */
SMC_NLA_LGR_V2_PEER_HOST, /* string */
+ __SMC_NLA_LGR_V2_MAX,
+ SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1
+};
+
+/* SMC_NLA_LGR_R_V2 nested attributes */
+enum {
+ SMC_NLA_LGR_R_V2_UNSPEC,
+ SMC_NLA_LGR_R_V2_DIRECT, /* u8 */
+ __SMC_NLA_LGR_R_V2_MAX,
+ SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1
};
/* SMC_GEN_LGR_SMCR attributes */
@@ -106,6 +117,8 @@ enum {
SMC_NLA_LGR_R_PNETID, /* string */
SMC_NLA_LGR_R_VLAN_ID, /* u8 */
SMC_NLA_LGR_R_CONNS_NUM, /* u32 */
+ SMC_NLA_LGR_R_V2_COMMON, /* nest */
+ SMC_NLA_LGR_R_V2, /* nest */
__SMC_NLA_LGR_R_MAX,
SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
};
@@ -138,7 +151,7 @@ enum {
SMC_NLA_LGR_D_PNETID, /* string */
SMC_NLA_LGR_D_CHID, /* u16 */
SMC_NLA_LGR_D_PAD, /* flag */
- SMC_NLA_LGR_V2, /* nest */
+ SMC_NLA_LGR_D_V2_COMMON, /* nest */
__SMC_NLA_LGR_D_MAX,
SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
};
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f69ef3f2019f..5e50e007a7da 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -439,6 +439,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
return 0;
}
+static bool smc_isascii(char *hostname)
+{
+ int i;
+
+ for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
+ if (!isascii(hostname[i]))
+ return false;
+ return true;
+}
+
+static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
+ struct smc_clc_msg_accept_confirm *clc)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)clc;
+ struct smc_clc_first_contact_ext *fce;
+ int clc_v2_len;
+
+ if (clc->hdr.version == SMC_V1 ||
+ !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return;
+
+ if (smc->conn.lgr->is_smcd) {
+ memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
+ SMC_MAX_EID_LEN);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
+ d1);
+ } else {
+ memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
+ SMC_MAX_EID_LEN);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
+ r1);
+ }
+ fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
+ smc->conn.lgr->peer_os = fce->os_type;
+ smc->conn.lgr->peer_smc_release = fce->release;
+ if (smc_isascii(fce->hostname))
+ memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
+ SMC_MAX_HOSTNAME_LEN);
+}
+
static void smcr_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@@ -451,16 +492,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}
-static bool smc_isascii(char *hostname)
-{
- int i;
-
- for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
- if (!isascii(hostname[i]))
- return false;
- return true;
-}
-
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@@ -472,22 +503,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
- if (clc->hdr.version > SMC_V1 &&
- (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)clc;
- struct smc_clc_first_contact_ext *fce =
- (struct smc_clc_first_contact_ext *)
- (((u8 *)clc_v2) + sizeof(*clc_v2));
-
- memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
- SMC_MAX_EID_LEN);
- smc->conn.lgr->peer_os = fce->os_type;
- smc->conn.lgr->peer_smc_release = fce->release;
- if (smc_isascii(fce->hostname))
- memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
- SMC_MAX_HOSTNAME_LEN);
- }
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -497,14 +512,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
smcd_conn_save_peer_info(smc, clc);
else
smcr_conn_save_peer_info(smc, clc);
+ smc_conn_save_peer_info_fce(smc, clc);
}
static void smc_link_save_peer_info(struct smc_link *link,
- struct smc_clc_msg_accept_confirm *clc)
+ struct smc_clc_msg_accept_confirm *clc,
+ struct smc_init_info *ini)
{
link->peer_qpn = ntoh24(clc->r0.qpn);
- memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE);
- memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac));
+ memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
+ memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
link->peer_psn = ntoh24(clc->r0.psn);
link->peer_mtu = clc->r0.qp_mtu;
}
@@ -608,7 +625,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
* used for the internal TCP socket
*/
smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
- if (!ini->ib_dev)
+ if (!ini->check_smcrv2 && !ini->ib_dev)
+ return SMC_CLC_DECL_NOSMCRDEV;
+ if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
return SMC_CLC_DECL_NOSMCRDEV;
return 0;
}
@@ -692,27 +711,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
int rc = 0;
/* check if there is an ism device available */
- if (ini->smcd_version & SMC_V1) {
- if (smc_find_ism_device(smc, ini) ||
- smc_connect_ism_vlan_setup(smc, ini)) {
- if (ini->smc_type_v1 == SMC_TYPE_B)
- ini->smc_type_v1 = SMC_TYPE_R;
- else
- ini->smc_type_v1 = SMC_TYPE_N;
- } /* else ISM V1 is supported for this connection */
- if (smc_find_rdma_device(smc, ini)) {
- if (ini->smc_type_v1 == SMC_TYPE_B)
- ini->smc_type_v1 = SMC_TYPE_D;
- else
- ini->smc_type_v1 = SMC_TYPE_N;
- } /* else RDMA is supported for this connection */
- }
- if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini))
- ini->smc_type_v2 = SMC_TYPE_N;
+ if (!(ini->smcd_version & SMC_V1) ||
+ smc_find_ism_device(smc, ini) ||
+ smc_connect_ism_vlan_setup(smc, ini))
+ ini->smcd_version &= ~SMC_V1;
+ /* else ISM V1 is supported for this connection */
+
+ /* check if there is an rdma device available */
+ if (!(ini->smcr_version & SMC_V1) ||
+ smc_find_rdma_device(smc, ini))
+ ini->smcr_version &= ~SMC_V1;
+ /* else RDMA is supported for this connection */
+
+ ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
+ ini->smcr_version & SMC_V1);
+
+ /* check if there is an ism v2 device available */
+ if (!(ini->smcd_version & SMC_V2) ||
+ !smc_ism_is_v2_capable() ||
+ smc_find_ism_v2_device_clnt(smc, ini))
+ ini->smcd_version &= ~SMC_V2;
+
+ /* check if there is an rdma v2 device available */
+ ini->check_smcrv2 = true;
+ ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
+ if (!(ini->smcr_version & SMC_V2) ||
+ smc->clcsock->sk->sk_family != AF_INET ||
+ !smc_clc_ueid_count() ||
+ smc_find_rdma_device(smc, ini))
+ ini->smcr_version &= ~SMC_V2;
+ ini->check_smcrv2 = false;
+
+ ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
+ ini->smcr_version & SMC_V2);
/* if neither ISM nor RDMA are supported, fallback */
- if (!smcr_indicated(ini->smc_type_v1) &&
- ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
+ if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
@@ -752,6 +786,64 @@ static int smc_connect_clc(struct smc_sock *smc,
SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}
+void smc_fill_gid_list(struct smc_link_group *lgr,
+ struct smc_gidlist *gidlist,
+ struct smc_ib_device *known_dev, u8 *known_gid)
+{
+ struct smc_init_info *alt_ini = NULL;
+
+ memset(gidlist, 0, sizeof(*gidlist));
+ memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
+
+ alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
+ if (!alt_ini)
+ goto out;
+
+ alt_ini->vlan_id = lgr->vlan_id;
+ alt_ini->check_smcrv2 = true;
+ alt_ini->smcrv2.saddr = lgr->saddr;
+ smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
+
+ if (!alt_ini->smcrv2.ib_dev_v2)
+ goto out;
+
+ memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
+ SMC_GID_SIZE);
+
+out:
+ kfree(alt_ini);
+}
+
+static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
+ struct smc_clc_msg_accept_confirm *aclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
+ struct smc_clc_first_contact_ext *fce =
+ (struct smc_clc_first_contact_ext *)
+ (((u8 *)clc_v2) + sizeof(*clc_v2));
+
+ if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
+ return 0;
+
+ if (fce->v2_direct) {
+ memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
+ ini->smcrv2.uses_gateway = false;
+ } else {
+ if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
+ smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
+ ini->smcrv2.nexthop_mac,
+ &ini->smcrv2.uses_gateway))
+ return SMC_CLC_DECL_NOROUTE;
+ if (!ini->smcrv2.uses_gateway) {
+ /* mismatch: peer claims indirect, but its direct */
+ return SMC_CLC_DECL_NOINDIRECT;
+ }
+ }
+ return 0;
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
@@ -759,11 +851,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
{
int i, reason_code = 0;
struct smc_link *link;
+ u8 *eid = NULL;
ini->is_smcd = false;
- ini->ib_lcl = &aclc->r0.lcl;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
+ memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
+ memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
+ memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
+
+ reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
+ if (reason_code)
+ return reason_code;
mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
@@ -785,8 +884,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
!memcmp(l->peer_gid, &aclc->r0.lcl.gid,
SMC_GID_SIZE) &&
- !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
- sizeof(l->peer_mac))) {
+ (aclc->hdr.version > SMC_V1 ||
+ !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
+ sizeof(l->peer_mac)))) {
link = l;
break;
}
@@ -805,7 +905,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
if (ini->first_contact_local)
- smc_link_save_peer_info(link, aclc);
+ smc_link_save_peer_info(link, aclc, ini);
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
@@ -828,8 +928,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
smc_rmb_sync_sg_for_device(&smc->conn);
+ if (aclc->hdr.version > SMC_V1) {
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
+
+ eid = clc_v2->r1.eid;
+ if (ini->first_contact_local)
+ smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
+ link->smcibdev, link->gid);
+ }
+
reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
- SMC_V1, NULL);
+ aclc->hdr.version, eid, ini);
if (reason_code)
goto connect_abort;
@@ -869,7 +979,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
- if (ini->ism_chid[i] == ntohs(aclc->chid)) {
+ if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i;
return 0;
}
@@ -923,11 +1033,11 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
- eid = clc_v2->eid;
+ eid = clc_v2->d1.eid;
}
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
- aclc->hdr.version, eid);
+ aclc->hdr.version, eid, NULL);
if (rc)
goto connect_abort;
mutex_unlock(&smc_server_lgr_pending);
@@ -950,17 +1060,24 @@ connect_abort:
static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc)
{
- if ((aclc->hdr.typev1 == SMC_TYPE_R &&
- !smcr_indicated(ini->smc_type_v1)) ||
- (aclc->hdr.typev1 == SMC_TYPE_D &&
- ((!smcd_indicated(ini->smc_type_v1) &&
- !smcd_indicated(ini->smc_type_v2)) ||
- (aclc->hdr.version == SMC_V1 &&
- !smcd_indicated(ini->smc_type_v1)) ||
- (aclc->hdr.version == SMC_V2 &&
- !smcd_indicated(ini->smc_type_v2)))))
+ if (aclc->hdr.typev1 != SMC_TYPE_R &&
+ aclc->hdr.typev1 != SMC_TYPE_D)
return SMC_CLC_DECL_MODEUNSUPP;
+ if (aclc->hdr.version >= SMC_V2) {
+ if ((aclc->hdr.typev1 == SMC_TYPE_R &&
+ !smcr_indicated(ini->smc_type_v2)) ||
+ (aclc->hdr.typev1 == SMC_TYPE_D &&
+ !smcd_indicated(ini->smc_type_v2)))
+ return SMC_CLC_DECL_MODEUNSUPP;
+ } else {
+ if ((aclc->hdr.typev1 == SMC_TYPE_R &&
+ !smcr_indicated(ini->smc_type_v1)) ||
+ (aclc->hdr.typev1 == SMC_TYPE_D &&
+ !smcd_indicated(ini->smc_type_v1)))
+ return SMC_CLC_DECL_MODEUNSUPP;
+ }
+
return 0;
}
@@ -991,14 +1108,15 @@ static int __smc_connect(struct smc_sock *smc)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
version);
- ini->smcd_version = SMC_V1;
- ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0;
+ ini->smcd_version = SMC_V1 | SMC_V2;
+ ini->smcr_version = SMC_V1 | SMC_V2;
ini->smc_type_v1 = SMC_TYPE_B;
- ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N;
+ ini->smc_type_v2 = SMC_TYPE_B;
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
ini->smcd_version &= ~SMC_V1;
+ ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N;
if (!ini->smcd_version) {
rc = SMC_CLC_DECL_GETVLANERR;
@@ -1026,15 +1144,17 @@ static int __smc_connect(struct smc_sock *smc)
/* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc);
version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
- ini->smcd_version = version;
if (rc)
goto vlan_cleanup;
/* depending on previous steps, connect using rdma or ism */
- if (aclc->hdr.typev1 == SMC_TYPE_R)
+ if (aclc->hdr.typev1 == SMC_TYPE_R) {
+ ini->smcr_version = version;
rc = smc_connect_rdma(smc, aclc, ini);
- else if (aclc->hdr.typev1 == SMC_TYPE_D)
+ } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
+ ini->smcd_version = version;
rc = smc_connect_ism(smc, aclc, ini);
+ }
if (rc)
goto vlan_cleanup;
@@ -1315,7 +1435,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
/* initial contact - try to establish second link */
- smc_llc_srv_add_link(link);
+ smc_llc_srv_add_link(link, NULL);
return 0;
}
@@ -1395,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
ini->smc_type_v1 = pclc->hdr.typev1;
ini->smc_type_v2 = pclc->hdr.typev2;
- ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
- if (pclc->hdr.version > SMC_V1)
- ini->smcd_version |=
- ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
- if (!(ini->smcd_version & SMC_V2)) {
+ ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
+ ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
+ if (pclc->hdr.version > SMC_V1) {
+ if (smcd_indicated(ini->smc_type_v2))
+ ini->smcd_version |= SMC_V2;
+ if (smcr_indicated(ini->smc_type_v2))
+ ini->smcr_version |= SMC_V2;
+ }
+ if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
rc = SMC_CLC_DECL_PEERNOSMC;
goto out;
}
- if (!smc_ism_is_v2_capable()) {
- ini->smcd_version &= ~SMC_V2;
- rc = SMC_CLC_DECL_NOISM2SUPP;
- goto out;
- }
pclc_v2_ext = smc_get_clc_v2_ext(pclc);
if (!pclc_v2_ext) {
ini->smcd_version &= ~SMC_V2;
+ ini->smcr_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOV2EXT;
goto out;
}
pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
- if (!pclc_smcd_v2_ext) {
- ini->smcd_version &= ~SMC_V2;
- rc = SMC_CLC_DECL_NOV2DEXT;
+ if (ini->smcd_version & SMC_V2) {
+ if (!smc_ism_is_v2_capable()) {
+ ini->smcd_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOISM2SUPP;
+ } else if (!pclc_smcd_v2_ext) {
+ ini->smcd_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOV2DEXT;
+ } else if (!pclc_v2_ext->hdr.eid_cnt &&
+ !pclc_v2_ext->hdr.flag.seid) {
+ ini->smcd_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOUEID;
+ }
+ }
+ if (ini->smcr_version & SMC_V2) {
+ if (!pclc_v2_ext->hdr.eid_cnt) {
+ ini->smcr_version &= ~SMC_V2;
+ rc = SMC_CLC_DECL_NOUEID;
+ }
}
out:
- if (!ini->smcd_version)
+ if (!ini->smcd_version && !ini->smcr_version)
return rc;
return 0;
@@ -1541,10 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
pclc_smcd = smc_get_clc_msg_smcd(pclc);
smc_v2_ext = smc_get_clc_v2_ext(pclc);
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
- if (!smcd_v2_ext) {
- smc_find_ism_store_rc(SMC_CLC_DECL_NOV2DEXT, ini);
- goto not_found;
- }
mutex_lock(&smcd_dev_list.mutex);
if (pclc_smcd->ism.chid)
@@ -1562,8 +1693,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
}
mutex_unlock(&smcd_dev_list.mutex);
- if (!ini->ism_dev[0])
+ if (!ini->ism_dev[0]) {
+ smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
goto not_found;
+ }
smc_ism_get_system_eid(&eid);
if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
@@ -1616,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
not_found:
smc_find_ism_store_rc(rc, ini);
+ ini->smcd_version &= ~SMC_V1;
ini->ism_dev[0] = NULL;
ini->is_smcd = false;
}
@@ -1634,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
return 0;
}
+static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_v2_extension *smc_v2_ext;
+ u8 smcr_version;
+ int rc;
+
+ if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
+ goto not_found;
+
+ smc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
+ goto not_found;
+
+ /* prepare RDMA check */
+ memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
+ memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
+ memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
+ ini->check_smcrv2 = true;
+ ini->smcrv2.clc_sk = new_smc->clcsock->sk;
+ ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
+ ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
+ rc = smc_find_rdma_device(new_smc, ini);
+ if (rc) {
+ smc_find_ism_store_rc(rc, ini);
+ goto not_found;
+ }
+ if (!ini->smcrv2.uses_gateway)
+ memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
+
+ smcr_version = ini->smcr_version;
+ ini->smcr_version = SMC_V2;
+ rc = smc_listen_rdma_init(new_smc, ini);
+ if (!rc)
+ rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
+ if (!rc)
+ return;
+ ini->smcr_version = smcr_version;
+ smc_find_ism_store_rc(rc, ini);
+
+not_found:
+ ini->smcr_version &= ~SMC_V2;
+ ini->check_smcrv2 = false;
+}
+
static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
int rc;
- if (!smcr_indicated(ini->smc_type_v1))
+ if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
return SMC_CLC_DECL_NOSMCDEV;
/* prepare RDMA check */
- ini->ib_lcl = &pclc->lcl;
+ memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
+ memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
+ memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
rc = smc_find_rdma_device(new_smc, ini);
if (rc) {
/* no RDMA device found */
- if (ini->smc_type_v1 == SMC_TYPE_B)
- /* neither ISM nor RDMA device found */
- rc = SMC_CLC_DECL_NOSMCDEV;
- return rc;
+ return SMC_CLC_DECL_NOSMCDEV;
}
rc = smc_listen_rdma_init(new_smc, ini);
if (rc)
@@ -1664,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
- int rc;
+ int prfx_rc;
/* check for ISM device matching V2 proposed device */
smc_find_ism_v2_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0])
return 0;
- if (!(ini->smcd_version & SMC_V1))
- return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV;
-
- /* check for matching IP prefix and subnet length */
- rc = smc_listen_prfx_check(new_smc, pclc);
- if (rc)
- return ini->rc ?: rc;
+ /* check for matching IP prefix and subnet length (V1) */
+ prfx_rc = smc_listen_prfx_check(new_smc, pclc);
+ if (prfx_rc)
+ smc_find_ism_store_rc(prfx_rc, ini);
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
/* check for ISM device matching V1 proposed device */
- smc_find_ism_v1_device_serv(new_smc, pclc, ini);
+ if (!prfx_rc)
+ smc_find_ism_v1_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0])
return 0;
- if (pclc->hdr.typev1 == SMC_TYPE_D)
+ if (!smcr_indicated(pclc->hdr.typev1) &&
+ !smcr_indicated(pclc->hdr.typev2))
/* skip RDMA and decline */
return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
- /* check if RDMA is available */
- rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
- smc_find_ism_store_rc(rc, ini);
+ /* check if RDMA V2 is available */
+ smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
+ if (ini->smcrv2.ib_dev_v2)
+ return 0;
- return (!rc) ? 0 : ini->rc;
+ /* check if RDMA V1 is available */
+ if (!prfx_rc) {
+ int rc;
+
+ rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
+ smc_find_ism_store_rc(rc, ini);
+ return (!rc) ? 0 : ini->rc;
+ }
+ return SMC_CLC_DECL_NOSMCDEV;
}
/* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
- bool local_first)
+ bool local_first,
+ struct smc_init_info *ini)
{
struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_first)
- smc_link_save_peer_info(link, cclc);
+ smc_link_save_peer_info(link, cclc, ini);
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
return SMC_CLC_DECL_ERR_RTOK;
@@ -1729,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
- u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm *cclc;
struct smc_clc_msg_proposal_area *buf;
struct smc_clc_msg_proposal *pclc;
struct smc_init_info *ini = NULL;
+ u8 proposal_version = SMC_V1;
+ u8 accept_version;
int rc = 0;
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
@@ -1765,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work)
SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (rc)
goto out_decl;
- version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
+
+ if (pclc->hdr.version > SMC_V1)
+ proposal_version = SMC_V2;
/* IPSec connections opt out of SMC optimizations */
if (using_ipsec(new_smc)) {
@@ -1795,9 +1986,9 @@ static void smc_listen_work(struct work_struct *work)
goto out_unlock;
/* send SMC Accept CLC message */
+ accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
- ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1,
- ini->negotiated_eid);
+ accept_version, ini->negotiated_eid);
if (rc)
goto out_unlock;
@@ -1819,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work)
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
- ini->first_contact_local);
+ ini->first_contact_local, ini);
if (rc)
goto out_unlock;
mutex_unlock(&smc_server_lgr_pending);
@@ -1833,7 +2024,7 @@ out_unlock:
mutex_unlock(&smc_server_lgr_pending);
out_decl:
smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
- version);
+ proposal_version);
out_free:
kfree(ini);
kfree(buf);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 5e7def3ab730..f4286ca1f228 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -56,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */
struct smc_link_group;
struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
- u8 type;
+ union {
+ u8 type;
+#if defined(__BIG_ENDIAN_BITFIELD)
+ struct {
+ u8 llc_version:4,
+ llc_type:4;
+ };
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ struct {
+ u8 llc_type:4,
+ llc_version:4;
+ };
+#endif
+ };
} __aligned(1);
struct smc_cdc_conn_state_flags {
@@ -286,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc)
}
#endif
+struct smc_gidlist;
+
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk);
+void smc_fill_gid_list(struct smc_link_group *lgr,
+ struct smc_gidlist *gidlist,
+ struct smc_ib_device *known_dev, u8 *known_gid);
#endif /* __SMC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 1cc8a76b39f9..8409ab71a5e4 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -31,6 +31,7 @@
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
+#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108
#define SMC_CLC_RECV_BUF_LEN 100
/* eye catcher "SMCR" EBCDIC for CLC messages */
@@ -114,6 +115,17 @@ err_out:
return rc;
}
+int smc_clc_ueid_count(void)
+{
+ int count;
+
+ read_lock(&smc_clc_eid_table.lock);
+ count = smc_clc_eid_table.ueid_cnt;
+ read_unlock(&smc_clc_eid_table.lock);
+
+ return count;
+}
+
int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
@@ -298,7 +310,8 @@ bool smc_clc_match_eid(u8 *negotiated_eid,
negotiated_eid[0] = 0;
read_lock(&smc_clc_eid_table.lock);
- if (smc_clc_eid_table.seid_enabled &&
+ if (peer_eid && local_eid &&
+ smc_clc_eid_table.seid_enabled &&
smc_v2_ext->hdr.flag.seid &&
!memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) {
memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN);
@@ -380,6 +393,27 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
(ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
sizeof(struct smc_clc_first_contact_ext)))
return false;
+ if (hdr->typev1 == SMC_TYPE_R &&
+ ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
+ return false;
+ }
+ return true;
+}
+
+/* check arriving CLC decline */
+static bool
+smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
+{
+ struct smc_clc_msg_hdr *hdr = &dclc->hdr;
+
+ if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
+ return false;
+ if (hdr->version == SMC_V1) {
+ if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline))
+ return false;
+ } else {
+ if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2))
+ return false;
}
return true;
}
@@ -425,9 +459,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
- if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ if (!smc_clc_msg_decl_valid(dclc))
return false;
- trl = &dclc->trl;
+ check_trl = false;
break;
default:
return false;
@@ -726,15 +760,16 @@ out:
/* send CLC DECLINE message across internal TCP socket */
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
{
- struct smc_clc_msg_decline dclc;
+ struct smc_clc_msg_decline *dclc_v1;
+ struct smc_clc_msg_decline_v2 dclc;
struct msghdr msg;
+ int len, send_len;
struct kvec vec;
- int len;
+ dclc_v1 = (struct smc_clc_msg_decline *)&dclc;
memset(&dclc, 0, sizeof(dclc));
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE;
- dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = version;
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
@@ -744,14 +779,22 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
memcpy(dclc.id_for_peer, local_systemid,
sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
- memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ if (version == SMC_V1) {
+ memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER,
+ sizeof(SMC_EYECATCHER));
+ send_len = sizeof(*dclc_v1);
+ } else {
+ memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER,
+ sizeof(SMC_EYECATCHER));
+ send_len = sizeof(dclc);
+ }
+ dclc.hdr.length = htons(send_len);
memset(&msg, 0, sizeof(msg));
vec.iov_base = &dclc;
- vec.iov_len = sizeof(struct smc_clc_msg_decline);
- len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
- sizeof(struct smc_clc_msg_decline));
- if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
+ vec.iov_len = send_len;
+ len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len);
+ if (len < 0 || len < send_len)
len = -EPROTO;
return len > 0 ? 0 : len;
}
@@ -833,8 +876,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
} else {
struct smc_clc_eid_entry *ueident;
u16 v2_ext_offset;
- u8 *eid = NULL;
+ v2_ext->hdr.flag.release = SMC_RELEASE;
v2_ext_offset = sizeof(*pclc_smcd) -
offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
if (ini->smc_type_v1 != SMC_TYPE_N)
@@ -842,6 +885,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
+ plen += sizeof(*v2_ext);
read_lock(&smc_clc_eid_table.lock);
v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
@@ -851,10 +895,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
memcpy(v2_ext->user_eids[i++], ueident->eid,
sizeof(ueident->eid));
}
- v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
read_unlock(&smc_clc_eid_table.lock);
+ }
+ if (smcd_indicated(ini->smc_type_v2)) {
+ u8 *eid = NULL;
+
+ v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
- v2_ext->hdr.flag.release = SMC_RELEASE;
v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
offsetofend(struct smc_clnt_opts_area_hdr,
smcd_v2_ext_offset) +
@@ -862,7 +909,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
smc_ism_get_system_eid(&eid);
if (eid && v2_ext->hdr.flag.seid)
memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
- plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext);
+ plen += sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
gidchids[i - 1].gid =
@@ -874,6 +921,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid);
}
}
+ if (smcr_indicated(ini->smc_type_v2))
+ memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
+
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -897,12 +947,14 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
vec[i].iov_base = v2_ext;
vec[i++].iov_len = sizeof(*v2_ext) +
(v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
- vec[i].iov_base = smcd_v2_ext;
- vec[i++].iov_len = sizeof(*smcd_v2_ext);
- if (ini->ism_offered_cnt) {
- vec[i].iov_base = gidchids;
- vec[i++].iov_len = ini->ism_offered_cnt *
+ if (smcd_indicated(ini->smc_type_v2)) {
+ vec[i].iov_base = smcd_v2_ext;
+ vec[i++].iov_len = sizeof(*smcd_v2_ext);
+ if (ini->ism_offered_cnt) {
+ vec[i].iov_base = gidchids;
+ vec[i++].iov_len = ini->ism_offered_cnt *
sizeof(struct smc_clc_smcd_gid_chid);
+ }
}
}
vec[i].iov_base = trl;
@@ -925,13 +977,14 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2,
int first_contact, u8 version,
- u8 *eid)
+ u8 *eid, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_first_contact_ext fce;
+ struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
- struct kvec vec[3];
+ struct kvec vec[5];
struct msghdr msg;
int i, len;
@@ -953,9 +1006,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (version == SMC_V1) {
clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
} else {
- clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd));
- if (eid[0])
- memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN);
+ clc_v2->d1.chid =
+ htons(smc_ism_get_chid(conn->lgr->smcd));
+ if (eid && eid[0])
+ memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact)
smc_clc_fill_fce(&fce, &len);
@@ -994,6 +1048,26 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ if (eid && eid[0])
+ memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
+ len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact) {
+ smc_clc_fill_fce(&fce, &len);
+ fce.v2_direct = !link->lgr->uses_gateway;
+ memset(&gle, 0, sizeof(gle));
+ if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
+ gle.gid_cnt = ini->smcrv2.gidlist.len;
+ len += sizeof(gle);
+ len += gle.gid_cnt * sizeof(gle.gid[0]);
+ } else {
+ len += sizeof(gle.reserved);
+ }
+ }
+ clc_v2->hdr.length = htons(len);
+ }
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
}
@@ -1001,7 +1075,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
i = 0;
vec[i].iov_base = clc_v2;
if (version > SMC_V1)
- vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl);
+ vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
+ SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
+ SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) -
+ sizeof(trl);
else
vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN :
@@ -1010,6 +1087,18 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
vec[i++].iov_len = sizeof(fce);
+ if (!conn->lgr->is_smcd) {
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ vec[i].iov_base = &gle;
+ vec[i++].iov_len = sizeof(gle);
+ vec[i].iov_base = &ini->smcrv2.gidlist.list;
+ vec[i++].iov_len = gle.gid_cnt *
+ sizeof(gle.gid[0]);
+ } else {
+ vec[i].iov_base = &gle.reserved;
+ vec[i++].iov_len = sizeof(gle.reserved);
+ }
+ }
}
vec[i].iov_base = &trl;
vec[i++].iov_len = sizeof(trl);
@@ -1019,7 +1108,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
- u8 version, u8 *eid)
+ u8 version, u8 *eid, struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 cclc_v2;
int reason_code = 0;
@@ -1029,7 +1118,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
memset(&cclc_v2, 0, sizeof(cclc_v2));
cclc_v2.hdr.type = SMC_CLC_CONFIRM;
len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
- version, eid);
+ version, eid, ini);
if (len < ntohs(cclc_v2.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
@@ -1052,7 +1141,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
- version, negotiated_eid);
+ version, negotiated_eid, NULL);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 974d01d16bb5..83f02f131fc0 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
+#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
@@ -54,6 +55,8 @@
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */
#define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */
+#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */
+#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
@@ -213,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
#define SMC_CLC_OS_AIX 3
struct smc_clc_first_contact_ext {
- u8 reserved1;
#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 v2_direct : 1,
+ reserved : 7;
u8 os_type : 4,
release : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 7,
+ v2_direct : 1;
u8 release : 4,
os_type : 4;
#endif
@@ -225,6 +231,13 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
+struct smc_clc_fce_gid_ext {
+ u8 reserved[16];
+ u8 gid_cnt;
+ u8 reserved2[3];
+ u8 gid[][SMC_GID_SIZE];
+};
+
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
@@ -239,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
- struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
+ struct { /* SMC-R */
+ struct smcr_clc_msg_accept_confirm r0;
+ u8 eid[SMC_MAX_EID_LEN];
+ u8 reserved6[8];
+ } r1;
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
__be16 chid;
u8 eid[SMC_MAX_EID_LEN];
u8 reserved5[8];
- };
+ } d1;
};
};
@@ -264,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
+#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */
+
+struct smc_clc_msg_decline_v2 { /* clc decline message */
+ struct smc_clc_msg_hdr hdr;
+ u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
+ __be32 peer_diagnosis; /* diagnosis information */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 os_type : 4,
+ reserved : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 4,
+ os_type : 4;
+#endif
+ u8 reserved2[3];
+ __be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2];
+ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
+} __aligned(4);
+
/* determine start of the prefix area within the proposal message */
static inline struct smc_clc_msg_proposal_prefix *
smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
@@ -282,6 +317,17 @@ static inline bool smcd_indicated(int smc_type)
return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
}
+static inline u8 smc_indicated_type(int is_smcd, int is_smcr)
+{
+ if (is_smcd && is_smcr)
+ return SMC_TYPE_B;
+ if (is_smcd)
+ return SMC_TYPE_D;
+ if (is_smcr)
+ return SMC_TYPE_R;
+ return SMC_TYPE_N;
+}
+
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
@@ -334,7 +380,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
- u8 version, u8 *eid);
+ u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid);
void smc_clc_init(void) __init;
@@ -343,6 +389,7 @@ void smc_clc_get_hostname(u8 **host);
bool smc_clc_match_eid(u8 *negotiated_eid,
struct smc_clc_v2_extension *smc_v2_ext,
u8 *peer_eid, u8 *local_eid);
+int smc_clc_ueid_count(void);
int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb);
int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info);
int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4d463701a759..8e642f8f334f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -244,6 +244,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
goto errattr;
if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
goto errattr;
+ if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
+ goto errattr;
smc_clc_get_hostname(&host);
if (host) {
memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
@@ -271,12 +273,65 @@ errmsg:
return skb->len;
}
+/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
+static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nlattr *v2_attrs)
+{
+ char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
+ char smc_eid[SMC_MAX_EID_LEN + 1];
+
+ if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
+ goto errv2attr;
+ memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
+ smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
+ if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
+ goto errv2attr;
+ memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
+ smc_eid[SMC_MAX_EID_LEN] = 0;
+ if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
+ goto errv2attr;
+
+ nla_nest_end(skb, v2_attrs);
+ return 0;
+
+errv2attr:
+ nla_nest_cancel(skb, v2_attrs);
+ return -EMSGSIZE;
+}
+
+static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *v2_attrs;
+
+ v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
+ if (!v2_attrs)
+ goto errattr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
+ goto errv2attr;
+
+ nla_nest_end(skb, v2_attrs);
+ return 0;
+
+errv2attr:
+ nla_nest_cancel(skb, v2_attrs);
+errattr:
+ return -EMSGSIZE;
+}
+
static int smc_nl_fill_lgr(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
char smc_target[SMC_MAX_PNETID_LEN + 1];
- struct nlattr *attrs;
+ struct nlattr *attrs, *v2_attrs;
attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
if (!attrs)
@@ -296,6 +351,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
+ if (lgr->smc_version > SMC_V1) {
+ v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
+ if (!v2_attrs)
+ goto errattr;
+ if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
+ goto errattr;
+ if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
+ goto errattr;
+ }
nla_nest_end(skb, attrs);
return 0;
@@ -428,10 +492,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
- char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
- char smc_eid[SMC_MAX_EID_LEN + 1];
- struct nlattr *v2_attrs;
struct nlattr *attrs;
void *nlh;
@@ -463,32 +524,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
goto errattr;
+ if (lgr->smc_version > SMC_V1) {
+ struct nlattr *v2_attrs;
- v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2);
- if (!v2_attrs)
- goto errattr;
- if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
- goto errv2attr;
- if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
- goto errv2attr;
- if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
- goto errv2attr;
- memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
- smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
- if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
- goto errv2attr;
- memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
- smc_eid[SMC_MAX_EID_LEN] = 0;
- if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
- goto errv2attr;
-
- nla_nest_end(skb, v2_attrs);
+ v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
+ if (!v2_attrs)
+ goto errattr;
+ if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
+ goto errattr;
+ }
nla_nest_end(skb, attrs);
genlmsg_end(skb, nlh);
return 0;
-errv2attr:
- nla_nest_cancel(skb, v2_attrs);
errattr:
nla_nest_cancel(skb, attrs);
errout:
@@ -684,24 +732,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link)
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini)
{
+ struct smc_ib_device *smcibdev;
u8 rndvec[3];
int rc;
- get_device(&ini->ib_dev->ibdev->dev);
- atomic_inc(&ini->ib_dev->lnk_cnt);
+ if (lgr->smc_version == SMC_V2) {
+ lnk->smcibdev = ini->smcrv2.ib_dev_v2;
+ lnk->ibport = ini->smcrv2.ib_port_v2;
+ } else {
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ }
+ get_device(&lnk->smcibdev->ibdev->dev);
+ atomic_inc(&lnk->smcibdev->lnk_cnt);
+ lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
lnk->link_idx = link_idx;
- lnk->smcibdev = ini->ib_dev;
- lnk->ibport = ini->ib_port;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
- lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk);
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
- if (!ini->ib_dev->initialized) {
- rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+ if (!lnk->smcibdev->initialized) {
+ rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
if (rc)
goto out;
}
@@ -709,7 +763,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- ini->vlan_id, lnk->gid, &lnk->sgid_index);
+ ini->vlan_id, lnk->gid, &lnk->sgid_index,
+ lgr->smc_version == SMC_V2 ?
+ &ini->smcrv2 : NULL);
if (rc)
goto out;
rc = smc_llc_link_init(lnk);
@@ -740,11 +796,12 @@ clear_llc_lnk:
smc_llc_link_clear(lnk, false);
out:
smc_ibdev_cnt_dec(lnk);
- put_device(&ini->ib_dev->ibdev->dev);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
- if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
- wake_up(&ini->ib_dev->lnks_deleted);
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
return rc;
}
@@ -808,18 +865,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else {
/* SMC-R specific settings */
+ struct smc_ib_device *ibdev;
+ int ibport;
+
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+ lgr->smc_version = ini->smcr_version;
+ memcpy(lgr->peer_systemid, ini->peer_systemid,
SMC_SYSTEMID_LEN);
- memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+ if (lgr->smc_version == SMC_V2) {
+ ibdev = ini->smcrv2.ib_dev_v2;
+ ibport = ini->smcrv2.ib_port_v2;
+ lgr->saddr = ini->smcrv2.saddr;
+ lgr->uses_gateway = ini->smcrv2.uses_gateway;
+ memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
+ ETH_ALEN);
+ } else {
+ ibdev = ini->ib_dev;
+ ibport = ini->ib_port;
+ }
+ memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
+ if (smc_wr_alloc_lgr_mem(lgr))
+ goto free_wq;
smc_llc_lgr_init(lgr, smc);
link_idx = SMC_SINGLE_LINK;
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
- if (rc)
+ if (rc) {
+ smc_wr_free_lgr_mem(lgr);
goto free_wq;
+ }
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
@@ -1226,6 +1302,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
+ smc_wr_free_lgr_mem(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
@@ -1636,13 +1713,15 @@ out:
return rc;
}
-static bool smcr_lgr_match(struct smc_link_group *lgr,
- struct smc_clc_msg_local *lcl,
+static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
+ u8 peer_systemid[],
+ u8 peer_gid[],
+ u8 peer_mac_v1[],
enum smc_lgr_role role, u32 clcqpn)
{
int i;
- if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+ if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
lgr->role != role)
return false;
@@ -1650,8 +1729,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
if (!smc_link_active(&lgr->lnk[i]))
continue;
if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
- !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
- !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+ !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) &&
+ (smcr_version == SMC_V2 ||
+ !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN)))
return true;
}
return false;
@@ -1690,7 +1770,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
ini->ism_peer_gid[ini->ism_selected]) :
- smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
+ smcr_lgr_match(lgr, ini->smcr_version,
+ ini->peer_systemid,
+ ini->peer_gid, ini->peer_mac, role,
+ ini->ib_clcqpn)) &&
!lgr->sync_err &&
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 83d30b06016f..59cef3b830d8 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */
};
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
+#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */
struct smc_wr_buf {
u8 raw[SMC_WR_BUF_SIZE];
};
+struct smc_wr_v2_buf {
+ u8 raw[SMC_WR_BUF_V2_SIZE];
+};
+
#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */
enum smc_wr_reg_state {
@@ -92,7 +97,11 @@ struct smc_link {
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
struct completion *wr_tx_compl; /* WR send CQE completion */
/* above four vectors have wr_tx_cnt elements and use the same index */
+ struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */
+ struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/
+ struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
+ dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/
atomic_long_t wr_tx_id; /* seq # of last sent WR */
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
@@ -104,6 +113,7 @@ struct smc_link {
struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */
/* above three vectors have wr_rx_cnt elements and use the same index */
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
+ dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
u64 wr_rx_id; /* seq # of last recv WR */
u32 wr_rx_cnt; /* number of WR recv buffers */
unsigned long wr_rx_tstamp; /* jiffies when last buf rx */
@@ -208,6 +218,7 @@ enum smc_llc_flowtype {
SMC_LLC_FLOW_NONE = 0,
SMC_LLC_FLOW_ADD_LINK = 2,
SMC_LLC_FLOW_DEL_LINK = 4,
+ SMC_LLC_FLOW_REQ_ADD_LINK = 5,
SMC_LLC_FLOW_RKEY = 6,
};
@@ -250,6 +261,10 @@ struct smc_link_group {
/* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
/* smc link */
+ struct smc_wr_v2_buf *wr_rx_buf_v2;
+ /* WR v2 recv payload buffer */
+ struct smc_wr_v2_buf *wr_tx_buf_v2;
+ /* WR v2 send payload buffer */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
@@ -288,6 +303,9 @@ struct smc_link_group {
/* link keep alive time */
u32 llc_termination_rsn;
/* rsn code for termination */
+ u8 nexthop_mac[ETH_ALEN];
+ u8 uses_gateway;
+ __be32 saddr;
};
struct { /* SMC-D */
u64 peer_gid;
@@ -302,6 +320,31 @@ struct smc_link_group {
struct smc_clc_msg_local;
+#define GID_LIST_SIZE 2
+
+struct smc_gidlist {
+ u8 len;
+ u8 list[GID_LIST_SIZE][SMC_GID_SIZE];
+};
+
+struct smc_init_info_smcrv2 {
+ /* Input fields */
+ __be32 saddr;
+ struct sock *clc_sk;
+ __be32 daddr;
+
+ /* Output fields when saddr is set */
+ struct smc_ib_device *ib_dev_v2;
+ u8 ib_port_v2;
+ u8 ib_gid_v2[SMC_GID_SIZE];
+
+ /* Additional output fields when clc_sk and daddr is set as well */
+ u8 uses_gateway;
+ u8 nexthop_mac[ETH_ALEN];
+
+ struct smc_gidlist gidlist;
+};
+
struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
@@ -312,11 +355,16 @@ struct smc_init_info {
u32 rc;
u8 negotiated_eid[SMC_MAX_EID_LEN];
/* SMC-R */
- struct smc_clc_msg_local *ib_lcl;
+ u8 smcr_version;
+ u8 check_smcrv2;
+ u8 peer_gid[SMC_GID_SIZE];
+ u8 peer_mac[ETH_ALEN];
+ u8 peer_systemid[SMC_SYSTEMID_LEN];
struct smc_ib_device *ib_dev;
u8 ib_gid[SMC_GID_SIZE];
u8 ib_port;
u32 ib_clcqpn;
+ struct smc_init_info_smcrv2 smcrv2;
/* SMC-D */
u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index a8845343d183..d93055ec17ae 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -17,6 +17,7 @@
#include <linux/scatterlist.h>
#include <linux/wait.h>
#include <linux/mutex.h>
+#include <linux/inetdevice.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -62,16 +63,23 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
struct ib_qp_attr qp_attr;
+ u8 hop_lim = 1;
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IB_QPS_RTR;
qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
- rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
+ if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
+ hop_lim = IPV6_DEFAULT_HOPLIMIT;
+ rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0);
rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
- memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
- sizeof(lnk->peer_mac));
+ if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway)
+ memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac,
+ sizeof(lnk->lgr->nexthop_mac));
+ else
+ memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
+ sizeof(lnk->peer_mac));
qp_attr.dest_qp_num = lnk->peer_qpn;
qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */
qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming
@@ -183,9 +191,81 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
+int smc_ib_find_route(__be32 saddr, __be32 daddr,
+ u8 nexthop_mac[], u8 *uses_gateway)
+{
+ struct neighbour *neigh = NULL;
+ struct rtable *rt = NULL;
+ struct flowi4 fl4 = {
+ .saddr = saddr,
+ .daddr = daddr
+ };
+
+ if (daddr == cpu_to_be32(INADDR_NONE))
+ goto out;
+ rt = ip_route_output_flow(&init_net, &fl4, NULL);
+ if (IS_ERR(rt))
+ goto out;
+ if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
+ goto out;
+ neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
+ if (neigh) {
+ memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
+ *uses_gateway = rt->rt_uses_gateway;
+ return 0;
+ }
+out:
+ return -ENOENT;
+}
+
+static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
+ const struct ib_gid_attr *attr,
+ u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2)
+{
+ if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
+ if (gid)
+ memcpy(gid, &attr->gid, SMC_GID_SIZE);
+ if (sgid_index)
+ *sgid_index = attr->index;
+ return 0;
+ }
+ if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
+ struct in_device *in_dev = __in_dev_get_rcu(ndev);
+ const struct in_ifaddr *ifa;
+ bool subnet_match = false;
+
+ if (!in_dev)
+ goto out;
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (!inet_ifa_match(smcrv2->saddr, ifa))
+ continue;
+ subnet_match = true;
+ break;
+ }
+ if (!subnet_match)
+ goto out;
+ if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
+ smcrv2->daddr,
+ smcrv2->nexthop_mac,
+ &smcrv2->uses_gateway))
+ goto out;
+
+ if (gid)
+ memcpy(gid, &attr->gid, SMC_GID_SIZE);
+ if (sgid_index)
+ *sgid_index = attr->index;
+ return 0;
+ }
+out:
+ return -ENODEV;
+}
+
/* determine the gid for an ib-device port and vlan id */
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
- unsigned short vlan_id, u8 gid[], u8 *sgid_index)
+ unsigned short vlan_id, u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2)
{
const struct ib_gid_attr *attr;
const struct net_device *ndev;
@@ -201,15 +281,13 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
if (!IS_ERR(ndev) &&
((!vlan_id && !is_vlan_dev(ndev)) ||
(vlan_id && is_vlan_dev(ndev) &&
- vlan_dev_vlan_id(ndev) == vlan_id)) &&
- attr->gid_type == IB_GID_TYPE_ROCE) {
- rcu_read_unlock();
- if (gid)
- memcpy(gid, &attr->gid, SMC_GID_SIZE);
- if (sgid_index)
- *sgid_index = attr->index;
- rdma_put_gid_attr(attr);
- return 0;
+ vlan_dev_vlan_id(ndev) == vlan_id))) {
+ if (!smc_ib_determine_gid_rcu(ndev, attr, gid,
+ sgid_index, smcrv2)) {
+ rcu_read_unlock();
+ rdma_put_gid_attr(attr);
+ return 0;
+ }
}
rcu_read_unlock();
rdma_put_gid_attr(attr);
@@ -217,6 +295,58 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
return -ENODEV;
}
+/* check if gid is still defined on smcibdev */
+static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2,
+ struct smc_ib_device *smcibdev, u8 ibport)
+{
+ const struct ib_gid_attr *attr;
+ bool rc = false;
+ int i;
+
+ for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
+ attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i);
+ if (IS_ERR(attr))
+ continue;
+
+ rcu_read_lock();
+ if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) ||
+ (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ !(ipv6_addr_type((const struct in6_addr *)&attr->gid)
+ & IPV6_ADDR_LINKLOCAL)))
+ if (!memcmp(gid, &attr->gid, SMC_GID_SIZE))
+ rc = true;
+ rcu_read_unlock();
+ rdma_put_gid_attr(attr);
+ }
+ return rc;
+}
+
+/* check all links if the gid is still defined on smcibdev */
+static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr;
+ int i;
+
+ spin_lock_bh(&smc_lgr_list.lock);
+ list_for_each_entry(lgr, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN))
+ continue; /* lgr is not affected */
+ if (list_empty(&lgr->list))
+ continue;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state == SMC_LNK_UNUSED ||
+ lgr->lnk[i].smcibdev != smcibdev)
+ continue;
+ if (!smc_ib_check_link_gid(lgr->lnk[i].gid,
+ lgr->smc_version == SMC_V2,
+ smcibdev, ibport))
+ smcr_port_err(smcibdev, ibport);
+ }
+ }
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
@@ -255,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
smcr_port_add(smcibdev, port_idx + 1);
+ smc_ib_gid_check(smcibdev, port_idx + 1);
}
}
}
@@ -523,6 +654,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk)
/* create a queue pair within the protection domain for a link */
int smc_ib_create_queue_pair(struct smc_link *lnk)
{
+ int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
struct ib_qp_init_attr qp_attr = {
.event_handler = smc_ib_qp_event_handler,
.qp_context = lnk,
@@ -536,7 +668,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_send_wr = SMC_WR_BUF_CNT * 3,
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
- .max_recv_sge = 1,
+ .max_recv_sge = sges_per_buf,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 3085f5180da7..07585937370e 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */
int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */
};
+static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
+{
+ struct in6_addr *addr6 = (struct in6_addr *)gid;
+
+ if (ipv6_addr_v4mapped(addr6) ||
+ !(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2]))
+ return addr6->s6_addr32[3];
+ return cpu_to_be32(INADDR_NONE);
+}
+
+struct smc_init_info_smcrv2;
struct smc_buf_desc;
struct smc_link;
@@ -90,7 +101,10 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
- unsigned short vlan_id, u8 gid[], u8 *sgid_index);
+ unsigned short vlan_id, u8 gid[], u8 *sgid_index,
+ struct smc_init_info_smcrv2 *smcrv2);
+int smc_ib_find_route(__be32 saddr, __be32 daddr,
+ u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
#endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 72f4b72eb175..a9623c952007 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -23,16 +23,24 @@
struct smc_llc_hdr {
struct smc_wr_rx_hdr common;
- u8 length; /* 44 */
-#if defined(__BIG_ENDIAN_BITFIELD)
- u8 reserved:4,
- add_link_rej_rsn:4;
+ union {
+ struct {
+ u8 length; /* 44 */
+ #if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ add_link_rej_rsn:4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u8 add_link_rej_rsn:4,
- reserved:4;
+ u8 add_link_rej_rsn:4,
+ reserved:4;
#endif
+ };
+ u16 length_v2; /* 44 - 8192*/
+ };
u8 flags;
-};
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2
+ * (https://www.ibm.com/support/pages/node/6326337)
+ */
#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
@@ -76,6 +84,32 @@ struct smc_llc_msg_add_link_cont_rt {
__be64 rmb_vaddr_new;
};
+struct smc_llc_msg_add_link_v2_ext {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 v2_direct : 1,
+ reserved : 7;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 7,
+ v2_direct : 1;
+#endif
+ u8 reserved2;
+ u8 client_target_gid[SMC_GID_SIZE];
+ u8 reserved3[8];
+ u16 num_rkeys;
+ struct smc_llc_msg_add_link_cont_rt rt[];
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2
+ * (https://www.ibm.com/support/pages/node/6326337)
+ */
+
+struct smc_llc_msg_req_add_link_v2 {
+ struct smc_llc_hdr hd;
+ u8 reserved[20];
+ u8 gid_cnt;
+ u8 reserved2[3];
+ u8 gid[][SMC_GID_SIZE];
+};
+
#define SMC_LLC_RKEYS_PER_CONT_MSG 2
struct smc_llc_msg_add_link_cont { /* type 0x03 */
@@ -114,7 +148,8 @@ struct smc_rmb_rtoken {
__be64 rmb_vaddr;
} __packed; /* format defined in RFC7609 */
-#define SMC_LLC_RKEYS_PER_MSG 3
+#define SMC_LLC_RKEYS_PER_MSG 3
+#define SMC_LLC_RKEYS_PER_MSG_V2 255
struct smc_llc_msg_confirm_rkey { /* type 0x06 */
struct smc_llc_hdr hd;
@@ -135,9 +170,18 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */
u8 reserved2[4];
};
+struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ u8 num_inval_rkeys;
+ u8 reserved[2];
+ __be32 rkey[];
+};
+
union smc_llc_msg {
struct smc_llc_msg_confirm_link confirm_link;
struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_req_add_link_v2 req_add_link;
struct smc_llc_msg_add_link_cont add_link_cont;
struct smc_llc_msg_del_link delete_link;
@@ -189,7 +233,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
struct smc_llc_qentry *qentry)
{
- u8 msg_type = qentry->msg.raw.hdr.common.type;
+ u8 msg_type = qentry->msg.raw.hdr.common.llc_type;
if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) &&
flow_type != msg_type && !lgr->delayed_event) {
@@ -219,7 +263,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
spin_unlock_bh(&lgr->llc_flow_lock);
return false;
}
- switch (qentry->msg.raw.hdr.common.type) {
+ switch (qentry->msg.raw.hdr.common.llc_type) {
case SMC_LLC_ADD_LINK:
flow->type = SMC_LLC_FLOW_ADD_LINK;
break;
@@ -306,7 +350,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
smc_llc_flow_qentry_del(flow);
goto out;
}
- rcv_msg = flow->qentry->msg.raw.hdr.common.type;
+ rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type;
if (exp_msg && rcv_msg != exp_msg) {
if (exp_msg == SMC_LLC_ADD_LINK &&
rcv_msg == SMC_LLC_DELETE_LINK) {
@@ -374,6 +418,30 @@ static int smc_llc_add_pending_send(struct smc_link *link,
return 0;
}
+static int smc_llc_add_pending_send_v2(struct smc_link *link,
+ struct smc_wr_v2_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **pend)
+{
+ int rc;
+
+ rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend);
+ if (rc < 0)
+ return rc;
+ return 0;
+}
+
+static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr,
+ struct smc_link_group *lgr, size_t len)
+{
+ if (lgr->smc_version == SMC_V2) {
+ hdr->common.llc_version = SMC_V2;
+ hdr->length_v2 = len;
+ } else {
+ hdr->common.llc_version = 0;
+ hdr->length = len;
+ }
+}
+
/* high-level API to send LLC confirm link */
int smc_llc_send_confirm_link(struct smc_link *link,
enum smc_llc_reqresp reqresp)
@@ -390,8 +458,8 @@ int smc_llc_send_confirm_link(struct smc_link *link,
goto put_out;
confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
memset(confllc, 0, sizeof(*confllc));
- confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
- confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK;
+ smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc));
confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
@@ -426,8 +494,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
goto put_out;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
- rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
- rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
+ rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY;
+ smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc));
rtok_ix = 1;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
@@ -471,8 +539,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
goto put_out;
rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
- rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
- rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
+ rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY;
+ smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
rkeyllc->num_rkeys = 1;
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
@@ -482,26 +550,116 @@ put_out:
return rc;
}
+/* return first buffer from any of the next buf lists */
+static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
+ int *buf_lst)
+{
+ struct smc_buf_desc *buf_pos;
+
+ while (*buf_lst < SMC_RMBE_SIZES) {
+ buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
+ struct smc_buf_desc, list);
+ if (buf_pos)
+ return buf_pos;
+ (*buf_lst)++;
+ }
+ return NULL;
+}
+
+/* return next rmb from buffer lists */
+static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
+ int *buf_lst,
+ struct smc_buf_desc *buf_pos)
+{
+ struct smc_buf_desc *buf_next;
+
+ if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
+ (*buf_lst)++;
+ return _smc_llc_get_next_rmb(lgr, buf_lst);
+ }
+ buf_next = list_next_entry(buf_pos, list);
+ return buf_next;
+}
+
+static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
+ int *buf_lst)
+{
+ *buf_lst = 0;
+ return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
+}
+
+static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
+ struct smc_link *link, struct smc_link *link_new)
+{
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_buf_desc *buf_pos;
+ int prim_lnk_idx, lnk_idx, i;
+ struct smc_buf_desc *rmb;
+ int len = sizeof(*ext);
+ int buf_lst;
+
+ ext->v2_direct = !lgr->uses_gateway;
+ memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE);
+
+ prim_lnk_idx = link->link_idx;
+ lnk_idx = link_new->link_idx;
+ mutex_lock(&lgr->rmbs_lock);
+ ext->num_rkeys = lgr->conns_num;
+ if (!ext->num_rkeys)
+ goto out;
+ buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
+ for (i = 0; i < ext->num_rkeys; i++) {
+ if (!buf_pos)
+ break;
+ rmb = buf_pos;
+ ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
+ ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
+ ext->rt[i].rmb_vaddr_new =
+ cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
+ buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
+ while (buf_pos && !(buf_pos)->used)
+ buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
+ }
+ len += i * sizeof(ext->rt[0]);
+out:
+ mutex_unlock(&lgr->rmbs_lock);
+ return len;
+}
+
/* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_link *link_new,
enum smc_llc_reqresp reqresp)
{
+ struct smc_llc_msg_add_link_v2_ext *ext = NULL;
struct smc_llc_msg_add_link *addllc;
struct smc_wr_tx_pend_priv *pend;
- struct smc_wr_buf *wr_buf;
+ int len = sizeof(*addllc);
int rc;
if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
- rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
- if (rc)
- goto put_out;
- addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ if (link->lgr->smc_version == SMC_V2) {
+ struct smc_wr_v2_buf *wr_buf;
+
+ rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
+ if (rc)
+ goto put_out;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ ext = (struct smc_llc_msg_add_link_v2_ext *)
+ &wr_buf->raw[sizeof(*addllc)];
+ memset(ext, 0, SMC_WR_TX_SIZE);
+ } else {
+ struct smc_wr_buf *wr_buf;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ goto put_out;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ }
memset(addllc, 0, sizeof(*addllc));
- addllc->hd.common.type = SMC_LLC_ADD_LINK;
- addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ addllc->hd.common.llc_type = SMC_LLC_ADD_LINK;
if (reqresp == SMC_LLC_RESP)
addllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(addllc->sender_mac, mac, ETH_ALEN);
@@ -516,8 +674,14 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
addllc->qp_mtu = min(link_new->path_mtu,
link_new->peer_mtu);
}
+ if (ext && link_new)
+ len += smc_llc_fill_ext_v2(ext, link, link_new);
+ smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len);
/* send llc message */
- rc = smc_wr_tx_send(link, pend);
+ if (link->lgr->smc_version == SMC_V2)
+ rc = smc_wr_tx_v2_send(link, pend, len);
+ else
+ rc = smc_wr_tx_send(link, pend);
put_out:
smc_wr_tx_link_put(link);
return rc;
@@ -541,8 +705,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
delllc = (struct smc_llc_msg_del_link *)wr_buf;
memset(delllc, 0, sizeof(*delllc));
- delllc->hd.common.type = SMC_LLC_DELETE_LINK;
- delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
+ delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK;
+ smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc));
if (reqresp == SMC_LLC_RESP)
delllc->hd.flags |= SMC_LLC_FLAG_RESP;
if (orderly)
@@ -574,8 +738,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
goto put_out;
testllc = (struct smc_llc_msg_test_link *)wr_buf;
memset(testllc, 0, sizeof(*testllc));
- testllc->hd.common.type = SMC_LLC_TEST_LINK;
- testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+ testllc->hd.common.llc_type = SMC_LLC_TEST_LINK;
+ smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc));
memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
@@ -651,44 +815,6 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
return -EMLINK;
}
-/* return first buffer from any of the next buf lists */
-static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
- int *buf_lst)
-{
- struct smc_buf_desc *buf_pos;
-
- while (*buf_lst < SMC_RMBE_SIZES) {
- buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
- struct smc_buf_desc, list);
- if (buf_pos)
- return buf_pos;
- (*buf_lst)++;
- }
- return NULL;
-}
-
-/* return next rmb from buffer lists */
-static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
- int *buf_lst,
- struct smc_buf_desc *buf_pos)
-{
- struct smc_buf_desc *buf_next;
-
- if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
- (*buf_lst)++;
- return _smc_llc_get_next_rmb(lgr, buf_lst);
- }
- buf_next = list_next_entry(buf_pos, list);
- return buf_next;
-}
-
-static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
- int *buf_lst)
-{
- *buf_lst = 0;
- return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
-}
-
/* send one add_link_continue msg */
static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_link *link_new, u8 *num_rkeys_todo,
@@ -734,7 +860,7 @@ static int smc_llc_add_link_cont(struct smc_link *link,
while (*buf_pos && !(*buf_pos)->used)
*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
}
- addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
+ addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT;
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
if (lgr->role == SMC_CLNT)
addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
@@ -793,6 +919,8 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
+ sizeof(qentry->msg));
return smc_llc_send_message(qentry->link, &qentry->msg);
}
@@ -813,7 +941,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
SMC_LLC_DEL_LOST_PATH);
return -ENOLINK;
}
- if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
+ if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
/* received DELETE_LINK instead */
qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, &qentry->msg);
@@ -854,6 +982,26 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
return 0;
}
+static void smc_llc_save_add_link_rkeys(struct smc_link *link,
+ struct smc_link *link_new)
+{
+ struct smc_llc_msg_add_link_v2_ext *ext;
+ struct smc_link_group *lgr = link->lgr;
+ int max, i;
+
+ ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
+ SMC_WR_TX_SIZE);
+ max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
+ mutex_lock(&lgr->rmbs_lock);
+ for (i = 0; i < max; i++) {
+ smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
+ ext->rt[i].rmb_key,
+ ext->rt[i].rmb_vaddr_new,
+ ext->rt[i].rmb_key_new);
+ }
+ mutex_unlock(&lgr->rmbs_lock);
+}
+
static void smc_llc_save_add_link_info(struct smc_link *link,
struct smc_llc_msg_add_link *add_llc)
{
@@ -870,31 +1018,47 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = smc_get_lgr(link);
+ struct smc_init_info *ini = NULL;
struct smc_link *lnk_new = NULL;
- struct smc_init_info ini;
int lnk_idx, rc = 0;
if (!llc->qp_mtu)
goto out_reject;
- ini.vlan_id = lgr->vlan_id;
- smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini) {
+ rc = -ENOMEM;
+ goto out_reject;
+ }
+
+ ini->vlan_id = lgr->vlan_id;
+ if (lgr->smc_version == SMC_V2) {
+ ini->check_smcrv2 = true;
+ ini->smcrv2.saddr = lgr->saddr;
+ ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
+ }
+ smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
- !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) {
- if (!ini.ib_dev)
+ (lgr->smc_version == SMC_V2 ||
+ !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) {
+ if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2)
goto out_reject;
lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
}
- if (!ini.ib_dev) {
+ if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
- ini.ib_dev = link->smcibdev;
- ini.ib_port = link->ibport;
+ ini->smcrv2.ib_dev_v2 = link->smcibdev;
+ ini->smcrv2.ib_port_v2 = link->ibport;
+ } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+ ini->ib_dev = link->smcibdev;
+ ini->ib_port = link->ibport;
}
lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
if (lnk_idx < 0)
goto out_reject;
lnk_new = &lgr->lnk[lnk_idx];
- rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini);
+ rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini);
if (rc)
goto out_reject;
smc_llc_save_add_link_info(lnk_new, llc);
@@ -910,16 +1074,20 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_clear_lnk;
rc = smc_llc_send_add_link(link,
- lnk_new->smcibdev->mac[ini.ib_port - 1],
+ lnk_new->smcibdev->mac[lnk_new->ibport - 1],
lnk_new->gid, lnk_new, SMC_LLC_RESP);
if (rc)
goto out_clear_lnk;
- rc = smc_llc_cli_rkey_exchange(link, lnk_new);
- if (rc) {
- rc = 0;
- goto out_clear_lnk;
+ if (lgr->smc_version == SMC_V2) {
+ smc_llc_save_add_link_rkeys(link, lnk_new);
+ } else {
+ rc = smc_llc_cli_rkey_exchange(link, lnk_new);
+ if (rc) {
+ rc = 0;
+ goto out_clear_lnk;
+ }
}
- rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t);
+ rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t);
if (!rc)
goto out;
out_clear_lnk:
@@ -928,29 +1096,78 @@ out_clear_lnk:
out_reject:
smc_llc_cli_add_link_reject(qentry);
out:
+ kfree(ini);
kfree(qentry);
return rc;
}
+static void smc_llc_send_request_add_link(struct smc_link *link)
+{
+ struct smc_llc_msg_req_add_link_v2 *llc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_v2_buf *wr_buf;
+ struct smc_gidlist gidlist;
+ int rc, len, i;
+
+ if (!smc_wr_tx_link_hold(link))
+ return;
+ if (link->lgr->type == SMC_LGR_SYMMETRIC ||
+ link->lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ goto put_out;
+
+ smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid);
+ if (gidlist.len <= 1)
+ goto put_out;
+
+ rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend);
+ if (rc)
+ goto put_out;
+ llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf;
+ memset(llc, 0, SMC_WR_TX_SIZE);
+
+ llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK;
+ for (i = 0; i < gidlist.len; i++)
+ memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0]));
+ llc->gid_cnt = gidlist.len;
+ len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0]));
+ smc_llc_init_msg_hdr(&llc->hd, link->lgr, len);
+ rc = smc_wr_tx_v2_send(link, pend, len);
+ if (!rc)
+ /* set REQ_ADD_LINK flow and wait for response from peer */
+ link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK;
+put_out:
+ smc_wr_tx_link_put(link);
+}
+
/* as an SMC client, invite server to start the add_link processing */
static void smc_llc_cli_add_link_invite(struct smc_link *link,
struct smc_llc_qentry *qentry)
{
struct smc_link_group *lgr = smc_get_lgr(link);
- struct smc_init_info ini;
+ struct smc_init_info *ini = NULL;
+
+ if (lgr->smc_version == SMC_V2) {
+ smc_llc_send_request_add_link(link);
+ goto out;
+ }
if (lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out;
- ini.vlan_id = lgr->vlan_id;
- smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
- if (!ini.ib_dev)
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini)
+ goto out;
+
+ ini->vlan_id = lgr->vlan_id;
+ smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
+ if (!ini->ib_dev)
goto out;
- smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1],
- ini.ib_gid, NULL, SMC_LLC_REQ);
+ smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1],
+ ini->ib_gid, NULL, SMC_LLC_REQ);
out:
+ kfree(ini);
kfree(qentry);
}
@@ -966,7 +1183,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc)
static bool smc_llc_is_local_add_link(union smc_llc_msg *llc)
{
- if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK &&
+ if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK &&
smc_llc_is_empty_llc_message(llc))
return true;
return false;
@@ -1133,7 +1350,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
/* receive CONFIRM LINK response over the RoCE fabric */
qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0);
if (!qentry ||
- qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
+ qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) {
/* send DELETE LINK */
smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
false, SMC_LLC_DEL_LOST_PATH);
@@ -1152,37 +1369,80 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
return 0;
}
-int smc_llc_srv_add_link(struct smc_link *link)
+static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry)
+{
+ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr,
+ sizeof(qentry->msg));
+ memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data));
+ smc_llc_send_message(qentry->link, &qentry->msg);
+}
+
+int smc_llc_srv_add_link(struct smc_link *link,
+ struct smc_llc_qentry *req_qentry)
{
enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
struct smc_link_group *lgr = link->lgr;
struct smc_llc_msg_add_link *add_llc;
struct smc_llc_qentry *qentry = NULL;
- struct smc_link *link_new;
- struct smc_init_info ini;
+ bool send_req_add_link_resp = false;
+ struct smc_link *link_new = NULL;
+ struct smc_init_info *ini = NULL;
int lnk_idx, rc = 0;
+ if (req_qentry &&
+ req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK)
+ send_req_add_link_resp = true;
+
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
/* ignore client add link recommendation, start new flow */
- ini.vlan_id = lgr->vlan_id;
- smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
- if (!ini.ib_dev) {
+ ini->vlan_id = lgr->vlan_id;
+ if (lgr->smc_version == SMC_V2) {
+ ini->check_smcrv2 = true;
+ ini->smcrv2.saddr = lgr->saddr;
+ if (send_req_add_link_resp) {
+ struct smc_llc_msg_req_add_link_v2 *req_add =
+ &req_qentry->msg.req_add_link;
+
+ ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
+ }
+ }
+ smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
+ if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) {
+ lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+ ini->smcrv2.ib_dev_v2 = link->smcibdev;
+ ini->smcrv2.ib_port_v2 = link->ibport;
+ } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) {
lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
- ini.ib_dev = link->smcibdev;
- ini.ib_port = link->ibport;
+ ini->ib_dev = link->smcibdev;
+ ini->ib_port = link->ibport;
}
lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
- if (lnk_idx < 0)
- return 0;
+ if (lnk_idx < 0) {
+ rc = 0;
+ goto out;
+ }
- rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini);
+ rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini);
if (rc)
- return rc;
+ goto out;
link_new = &lgr->lnk[lnk_idx];
+
+ rc = smcr_buf_map_lgr(link_new);
+ if (rc)
+ goto out_err;
+
rc = smc_llc_send_add_link(link,
- link_new->smcibdev->mac[ini.ib_port - 1],
+ link_new->smcibdev->mac[link_new->ibport-1],
link_new->gid, link_new, SMC_LLC_REQ);
if (rc)
goto out_err;
+ send_req_add_link_resp = false;
/* receive ADD LINK response over the RoCE fabric */
qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
if (!qentry) {
@@ -1197,7 +1457,8 @@ int smc_llc_srv_add_link(struct smc_link *link)
}
if (lgr->type == SMC_LGR_SINGLE &&
(!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
- !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) {
+ (lgr->smc_version == SMC_V2 ||
+ !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) {
lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
}
smc_llc_save_add_link_info(link_new, add_llc);
@@ -1206,39 +1467,49 @@ int smc_llc_srv_add_link(struct smc_link *link)
rc = smc_ib_ready_link(link_new);
if (rc)
goto out_err;
- rc = smcr_buf_map_lgr(link_new);
- if (rc)
- goto out_err;
rc = smcr_buf_reg_lgr(link_new);
if (rc)
goto out_err;
- rc = smc_llc_srv_rkey_exchange(link, link_new);
- if (rc)
- goto out_err;
+ if (lgr->smc_version == SMC_V2) {
+ smc_llc_save_add_link_rkeys(link, link_new);
+ } else {
+ rc = smc_llc_srv_rkey_exchange(link, link_new);
+ if (rc)
+ goto out_err;
+ }
rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
if (rc)
goto out_err;
+ kfree(ini);
return 0;
out_err:
- link_new->state = SMC_LNK_INACTIVE;
- smcr_link_clear(link_new, false);
+ if (link_new) {
+ link_new->state = SMC_LNK_INACTIVE;
+ smcr_link_clear(link_new, false);
+ }
+out:
+ kfree(ini);
+ if (send_req_add_link_resp)
+ smc_llc_send_req_add_link_response(req_qentry);
return rc;
}
static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
{
struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
+ struct smc_llc_qentry *qentry;
int rc;
- smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
mutex_lock(&lgr->llc_conf_mutex);
- rc = smc_llc_srv_add_link(link);
+ rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */
smc_llc_delete_asym_link(lgr);
}
mutex_unlock(&lgr->llc_conf_mutex);
+ kfree(qentry);
}
/* enqueue a local add_link req to trigger a new add_link flow */
@@ -1246,8 +1517,8 @@ void smc_llc_add_link_local(struct smc_link *link)
{
struct smc_llc_msg_add_link add_llc = {};
- add_llc.hd.length = sizeof(add_llc);
- add_llc.hd.common.type = SMC_LLC_ADD_LINK;
+ add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK;
+ smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc));
/* no dev and port needed */
smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
}
@@ -1269,7 +1540,8 @@ static void smc_llc_add_link_work(struct work_struct *work)
else
smc_llc_process_srv_add_link(lgr);
out:
- smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK)
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
/* enqueue a local del_link msg to trigger a new del_link flow,
@@ -1279,8 +1551,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
struct smc_llc_msg_del_link del_llc = {};
- del_llc.hd.length = sizeof(del_llc);
- del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
+ del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
+ smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc));
del_llc.link_num = del_link_id;
del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
@@ -1350,8 +1622,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
struct smc_llc_msg_del_link delllc = {};
int i;
- delllc.hd.common.type = SMC_LLC_DELETE_LINK;
- delllc.hd.length = sizeof(delllc);
+ delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK;
+ smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc));
if (ord)
delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
@@ -1467,6 +1739,8 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
link = qentry->link;
num_entries = llc->rtoken[0].num_rkeys;
+ if (num_entries > SMC_LLC_RKEYS_PER_MSG)
+ goto out_err;
/* first rkey entry is for receiving link */
rk_idx = smc_rtoken_add(link,
llc->rtoken[0].rmb_vaddr,
@@ -1485,6 +1759,7 @@ out_err:
llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
out:
llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
smc_llc_send_message(link, &qentry->msg);
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}
@@ -1502,6 +1777,28 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
llc = &qentry->msg.delete_rkey;
link = qentry->link;
+ if (lgr->smc_version == SMC_V2) {
+ struct smc_llc_msg_delete_rkey_v2 *llcv2;
+
+ memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc));
+ llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2;
+ llcv2->num_inval_rkeys = 0;
+
+ max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(link, llcv2->rkey[i]))
+ llcv2->num_inval_rkeys++;
+ }
+ memset(&llc->rkey[0], 0, sizeof(llc->rkey));
+ memset(&llc->reserved2, 0, sizeof(llc->reserved2));
+ smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc));
+ if (llcv2->num_inval_rkeys) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = llcv2->num_inval_rkeys;
+ }
+ goto finish;
+ }
+
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(link, llc->rkey[i]))
@@ -1511,6 +1808,7 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask;
}
+finish:
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, &qentry->msg);
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
@@ -1546,7 +1844,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
if (!smc_link_usable(link))
goto out;
- switch (llc->raw.hdr.common.type) {
+ switch (llc->raw.hdr.common.llc_type) {
case SMC_LLC_TEST_LINK:
llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
@@ -1571,8 +1869,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
qentry);
wake_up(&lgr->llc_msg_waiter);
- } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
- qentry)) {
+ return;
+ }
+ if (lgr->llc_flow_lcl.type ==
+ SMC_LLC_FLOW_REQ_ADD_LINK) {
+ /* server started add_link processing */
+ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+ qentry);
+ schedule_work(&lgr->llc_add_link_work);
+ return;
+ }
+ if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
schedule_work(&lgr->llc_add_link_work);
}
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
@@ -1620,6 +1928,23 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
}
return;
+ case SMC_LLC_REQ_ADD_LINK:
+ /* handle response here, smc_llc_flow_stop() cannot be called
+ * in tasklet context
+ */
+ if (lgr->role == SMC_CLNT &&
+ lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK &&
+ (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) {
+ smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl);
+ } else if (lgr->role == SMC_SERV) {
+ if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
+ /* as smc server, handle client suggestion */
+ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK;
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ return;
+ }
+ break;
default:
smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
break;
@@ -1663,7 +1988,7 @@ static void smc_llc_rx_response(struct smc_link *link,
{
enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type;
struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl;
- u8 llc_type = qentry->msg.raw.hdr.common.type;
+ u8 llc_type = qentry->msg.raw.hdr.common.llc_type;
switch (llc_type) {
case SMC_LLC_TEST_LINK:
@@ -1689,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link,
/* not used because max links is 3 */
break;
default:
- smc_llc_protocol_violation(link->lgr, llc_type);
+ smc_llc_protocol_violation(link->lgr,
+ qentry->msg.raw.hdr.common.type);
break;
}
kfree(qentry);
@@ -1714,7 +2040,8 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
/* process responses immediately */
- if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
+ if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) &&
+ llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) {
smc_llc_rx_response(link, qentry);
return;
}
@@ -1734,8 +2061,13 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
if (wc->byte_len < sizeof(*llc))
return; /* short message */
- if (llc->raw.hdr.length != sizeof(*llc))
- return; /* invalid message */
+ if (!llc->raw.hdr.common.llc_version) {
+ if (llc->raw.hdr.length != sizeof(*llc))
+ return; /* invalid message */
+ } else {
+ if (llc->raw.hdr.length_v2 < sizeof(*llc))
+ return; /* invalid message */
+ }
smc_llc_enqueue(link, llc);
}
@@ -1954,6 +2286,35 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
.handler = smc_llc_rx_handler,
.type = SMC_LLC_DELETE_RKEY
},
+ /* V2 types */
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_TEST_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_REQ_ADD_LINK_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY_V2
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_RKEY_V2
+ },
{
.handler = NULL,
}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index cc00a2ec4e92..4404e52b3346 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -30,10 +30,19 @@ enum smc_llc_msg_type {
SMC_LLC_ADD_LINK = 0x02,
SMC_LLC_ADD_LINK_CONT = 0x03,
SMC_LLC_DELETE_LINK = 0x04,
+ SMC_LLC_REQ_ADD_LINK = 0x05,
SMC_LLC_CONFIRM_RKEY = 0x06,
SMC_LLC_TEST_LINK = 0x07,
SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
SMC_LLC_DELETE_RKEY = 0x09,
+ /* V2 types */
+ SMC_LLC_CONFIRM_LINK_V2 = 0x21,
+ SMC_LLC_ADD_LINK_V2 = 0x22,
+ SMC_LLC_DELETE_LINK_V2 = 0x24,
+ SMC_LLC_REQ_ADD_LINK_V2 = 0x25,
+ SMC_LLC_CONFIRM_RKEY_V2 = 0x26,
+ SMC_LLC_TEST_LINK_V2 = 0x27,
+ SMC_LLC_DELETE_RKEY_V2 = 0x29,
};
#define smc_link_downing(state) \
@@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
u32 rsn);
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
-int smc_llc_srv_add_link(struct smc_link *link);
+int smc_llc_srv_add_link(struct smc_link *link,
+ struct smc_llc_qentry *req_qentry);
void smc_llc_add_link_local(struct smc_link *link);
int smc_llc_init(void) __init;
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 4a964e9190b0..67e9d9fde085 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc;
}
+static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i,
+ struct smc_init_info *ini)
+{
+ if (!ini->check_smcrv2 &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL,
+ NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
+ return 0;
+ }
+ if (ini->check_smcrv2 &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2,
+ NULL, &ini->smcrv2)) {
+ ini->smcrv2.ib_dev_v2 = ibdev;
+ ini->smcrv2.ib_port_v2 = i;
+ return 0;
+ }
+ return -ENODEV;
+}
+
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini,
@@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_ib_device *ibdev;
int i;
- ini->ib_dev = NULL;
mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (ibdev == known_dev)
@@ -971,12 +990,9 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
smc_ib_port_active(ibdev, i) &&
- !test_bit(i - 1, ibdev->ports_going_away) &&
- !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
- ini->ib_gid, NULL)) {
- ini->ib_dev = ibdev;
- ini->ib_port = i;
- goto out;
+ !test_bit(i - 1, ibdev->ports_going_away)) {
+ if (!smc_pnet_determine_gid(ibdev, i, ini))
+ goto out;
}
}
}
@@ -1016,12 +1032,9 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
- !test_bit(i - 1, ibdev->ports_going_away) &&
- !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
- ini->ib_gid, NULL)) {
- ini->ib_dev = ibdev;
- ini->ib_port = i;
- break;
+ !test_bit(i - 1, ibdev->ports_going_away)) {
+ if (!smc_pnet_determine_gid(ibdev, i, ini))
+ break;
}
}
}
@@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- ini->ib_dev = NULL;
- ini->ib_port = 0;
if (!dst)
goto out;
if (!dst->dev)
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index a419e9af36b9..600ab5889227 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -101,19 +101,32 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
}
pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
- if (pnd_snd_idx == link->wr_tx_cnt)
- return;
- link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
- if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
- complete(&link->wr_tx_compl[pnd_snd_idx]);
- memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
- /* clear the full struct smc_wr_tx_pend including .priv */
- memset(&link->wr_tx_pends[pnd_snd_idx], 0,
- sizeof(link->wr_tx_pends[pnd_snd_idx]));
- memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
- sizeof(link->wr_tx_bufs[pnd_snd_idx]));
- if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
- return;
+ if (pnd_snd_idx == link->wr_tx_cnt) {
+ if (link->lgr->smc_version != SMC_V2 ||
+ link->wr_tx_v2_pend->wr_id != wc->wr_id)
+ return;
+ link->wr_tx_v2_pend->wc_status = wc->status;
+ memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd));
+ /* clear the full struct smc_wr_tx_pend including .priv */
+ memset(link->wr_tx_v2_pend, 0,
+ sizeof(*link->wr_tx_v2_pend));
+ memset(link->lgr->wr_tx_buf_v2, 0,
+ sizeof(*link->lgr->wr_tx_buf_v2));
+ } else {
+ link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
+ if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
+ complete(&link->wr_tx_compl[pnd_snd_idx]);
+ memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx],
+ sizeof(pnd_snd));
+ /* clear the full struct smc_wr_tx_pend including .priv */
+ memset(&link->wr_tx_pends[pnd_snd_idx], 0,
+ sizeof(link->wr_tx_pends[pnd_snd_idx]));
+ memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
+ sizeof(link->wr_tx_bufs[pnd_snd_idx]));
+ if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
+ return;
+ }
+
if (wc->status) {
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
/* clear full struct smc_wr_tx_pend including .priv */
@@ -123,6 +136,12 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask);
}
+ if (link->lgr->smc_version == SMC_V2) {
+ memset(link->wr_tx_v2_pend, 0,
+ sizeof(*link->wr_tx_v2_pend));
+ memset(link->lgr->wr_tx_buf_v2, 0,
+ sizeof(*link->lgr->wr_tx_buf_v2));
+ }
/* terminate link */
smcr_link_down_cond_sched(link);
}
@@ -239,6 +258,33 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
return 0;
}
+int smc_wr_tx_get_v2_slot(struct smc_link *link,
+ smc_wr_tx_handler handler,
+ struct smc_wr_v2_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **wr_pend_priv)
+{
+ struct smc_wr_tx_pend *wr_pend;
+ struct ib_send_wr *wr_ib;
+ u64 wr_id;
+
+ if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt)
+ return -EBUSY;
+
+ *wr_buf = NULL;
+ *wr_pend_priv = NULL;
+ wr_id = smc_wr_tx_get_next_wr_id(link);
+ wr_pend = link->wr_tx_v2_pend;
+ wr_pend->wr_id = wr_id;
+ wr_pend->handler = handler;
+ wr_pend->link = link;
+ wr_pend->idx = link->wr_tx_cnt;
+ wr_ib = link->wr_tx_v2_ib;
+ wr_ib->wr_id = wr_id;
+ *wr_buf = link->lgr->wr_tx_buf_v2;
+ *wr_pend_priv = &wr_pend->priv;
+ return 0;
+}
+
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv)
{
@@ -256,6 +302,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
test_and_clear_bit(idx, link->wr_tx_mask);
wake_up(&link->wr_tx_wait);
return 1;
+ } else if (link->lgr->smc_version == SMC_V2 &&
+ pend->idx == link->wr_tx_cnt) {
+ /* Large v2 buffer */
+ memset(&link->wr_tx_v2_pend, 0,
+ sizeof(link->wr_tx_v2_pend));
+ memset(&link->lgr->wr_tx_buf_v2, 0,
+ sizeof(link->lgr->wr_tx_buf_v2));
+ return 1;
}
return 0;
@@ -280,6 +334,22 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
return rc;
}
+int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ int len)
+{
+ int rc;
+
+ link->wr_tx_v2_ib->sg_list[0].length = len;
+ ib_req_notify_cq(link->smcibdev->roce_cq_send,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL);
+ if (rc) {
+ smc_wr_tx_put_slot(link, priv);
+ smcr_link_down_cond_sched(link);
+ }
+ return rc;
+}
+
/* Send prepared WR slot via ib_post_send and wait for send completion
* notification.
* @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
@@ -517,6 +587,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk)
{
+ int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
u32 i;
for (i = 0; i < lnk->wr_tx_cnt; i++) {
@@ -545,14 +616,44 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
}
+
+ if (lnk->lgr->smc_version == SMC_V2) {
+ lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr;
+ lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE;
+ lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey;
+
+ lnk->wr_tx_v2_ib->next = NULL;
+ lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge;
+ lnk->wr_tx_v2_ib->num_sge = 1;
+ lnk->wr_tx_v2_ib->opcode = IB_WR_SEND;
+ lnk->wr_tx_v2_ib->send_flags =
+ IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ }
+
+ /* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE.
+ * Each ib_recv_wr gets 2 sges, the second one is a spillover buffer
+ * and the same buffer for all sges. When a larger message arrived then
+ * the content of the first small sge is copied to the beginning of
+ * the larger spillover buffer, allowing easy data mapping.
+ */
for (i = 0; i < lnk->wr_rx_cnt; i++) {
- lnk->wr_rx_sges[i].addr =
+ int x = i * sges_per_buf;
+
+ lnk->wr_rx_sges[x].addr =
lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
- lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
- lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
+ lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE;
+ lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey;
+ if (lnk->lgr->smc_version == SMC_V2) {
+ lnk->wr_rx_sges[x + 1].addr =
+ lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE;
+ lnk->wr_rx_sges[x + 1].length =
+ SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE;
+ lnk->wr_rx_sges[x + 1].lkey =
+ lnk->roce_pd->local_dma_lkey;
+ }
lnk->wr_rx_ibs[i].next = NULL;
- lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
- lnk->wr_rx_ibs[i].num_sge = 1;
+ lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x];
+ lnk->wr_rx_ibs[i].num_sge = sges_per_buf;
}
lnk->wr_reg.wr.next = NULL;
lnk->wr_reg.wr.num_sge = 0;
@@ -585,16 +686,45 @@ void smc_wr_free_link(struct smc_link *lnk)
DMA_FROM_DEVICE);
lnk->wr_rx_dma_addr = 0;
}
+ if (lnk->wr_rx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_FROM_DEVICE);
+ lnk->wr_rx_v2_dma_addr = 0;
+ }
if (lnk->wr_tx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
DMA_TO_DEVICE);
lnk->wr_tx_dma_addr = 0;
}
+ if (lnk->wr_tx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_TO_DEVICE);
+ lnk->wr_tx_v2_dma_addr = 0;
+ }
+}
+
+void smc_wr_free_lgr_mem(struct smc_link_group *lgr)
+{
+ if (lgr->smc_version < SMC_V2)
+ return;
+
+ kfree(lgr->wr_rx_buf_v2);
+ lgr->wr_rx_buf_v2 = NULL;
+ kfree(lgr->wr_tx_buf_v2);
+ lgr->wr_tx_buf_v2 = NULL;
}
void smc_wr_free_link_mem(struct smc_link *lnk)
{
+ kfree(lnk->wr_tx_v2_ib);
+ lnk->wr_tx_v2_ib = NULL;
+ kfree(lnk->wr_tx_v2_sge);
+ lnk->wr_tx_v2_sge = NULL;
+ kfree(lnk->wr_tx_v2_pend);
+ lnk->wr_tx_v2_pend = NULL;
kfree(lnk->wr_tx_compl);
lnk->wr_tx_compl = NULL;
kfree(lnk->wr_tx_pends);
@@ -619,8 +749,26 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
lnk->wr_rx_bufs = NULL;
}
+int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
+{
+ if (lgr->smc_version < SMC_V2)
+ return 0;
+
+ lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
+ if (!lgr->wr_rx_buf_v2)
+ return -ENOMEM;
+ lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
+ if (!lgr->wr_tx_buf_v2) {
+ kfree(lgr->wr_rx_buf_v2);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
int smc_wr_alloc_link_mem(struct smc_link *link)
{
+ int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1;
+
/* allocate link related memory */
link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
if (!link->wr_tx_bufs)
@@ -653,7 +801,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
if (!link->wr_tx_sges)
goto no_mem_wr_tx_rdma_sges;
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
- sizeof(link->wr_rx_sges[0]),
+ sizeof(link->wr_rx_sges[0]) * sges_per_buf,
GFP_KERNEL);
if (!link->wr_rx_sges)
goto no_mem_wr_tx_sges;
@@ -672,8 +820,29 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_tx_compl)
goto no_mem_wr_tx_pends;
+
+ if (link->lgr->smc_version == SMC_V2) {
+ link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib),
+ GFP_KERNEL);
+ if (!link->wr_tx_v2_ib)
+ goto no_mem_tx_compl;
+ link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge),
+ GFP_KERNEL);
+ if (!link->wr_tx_v2_sge)
+ goto no_mem_v2_ib;
+ link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend),
+ GFP_KERNEL);
+ if (!link->wr_tx_v2_pend)
+ goto no_mem_v2_sge;
+ }
return 0;
+no_mem_v2_sge:
+ kfree(link->wr_tx_v2_sge);
+no_mem_v2_ib:
+ kfree(link->wr_tx_v2_ib);
+no_mem_tx_compl:
+ kfree(link->wr_tx_compl);
no_mem_wr_tx_pends:
kfree(link->wr_tx_pends);
no_mem_wr_tx_mask:
@@ -725,6 +894,24 @@ int smc_wr_create_link(struct smc_link *lnk)
rc = -EIO;
goto out;
}
+ if (lnk->lgr->smc_version == SMC_V2) {
+ lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev,
+ lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
+ lnk->wr_rx_v2_dma_addr = 0;
+ rc = -EIO;
+ goto dma_unmap;
+ }
+ lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev,
+ lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) {
+ lnk->wr_tx_v2_dma_addr = 0;
+ rc = -EIO;
+ goto dma_unmap;
+ }
+ }
lnk->wr_tx_dma_addr = ib_dma_map_single(
ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
DMA_TO_DEVICE);
@@ -742,6 +929,18 @@ int smc_wr_create_link(struct smc_link *lnk)
return rc;
dma_unmap:
+ if (lnk->wr_rx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_FROM_DEVICE);
+ lnk->wr_rx_v2_dma_addr = 0;
+ }
+ if (lnk->wr_tx_v2_dma_addr) {
+ ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
+ SMC_WR_BUF_V2_SIZE,
+ DMA_TO_DEVICE);
+ lnk->wr_tx_v2_dma_addr = 0;
+ }
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
DMA_FROM_DEVICE);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2bc626f230a5..f353311e6f84 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -101,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link)
int smc_wr_create_link(struct smc_link *lnk);
int smc_wr_alloc_link_mem(struct smc_link *lnk);
+int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr);
void smc_wr_free_link(struct smc_link *lnk);
void smc_wr_free_link_mem(struct smc_link *lnk);
+void smc_wr_free_lgr_mem(struct smc_link_group *lgr);
void smc_wr_remember_qp_attr(struct smc_link *lnk);
void smc_wr_remove_dev(struct smc_ib_device *smcibdev);
void smc_wr_add_dev(struct smc_ib_device *smcibdev);
@@ -111,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wrs,
struct smc_wr_tx_pend_priv **wr_pend_priv);
+int smc_wr_tx_get_v2_slot(struct smc_link *link,
+ smc_wr_tx_handler handler,
+ struct smc_wr_v2_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **wr_pend_priv);
int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
+int smc_wr_tx_v2_send(struct smc_link *link,
+ struct smc_wr_tx_pend_priv *priv, int len);
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);