diff options
Diffstat (limited to 'drivers/infiniband/ulp/iser')
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 104 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 30 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 102 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 91 |
5 files changed, 193 insertions, 140 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 20ca6a619476..6a594aac2290 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -97,7 +97,7 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); module_param_named(pi_guard, iser_pi_guard, int, 0644); -MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)"); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); static struct workqueue_struct *release_wq; struct iser_global ig; @@ -164,18 +164,42 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) return 0; } -int iser_initialize_task_headers(struct iscsi_task *task, - struct iser_tx_desc *tx_desc) +/** + * iser_initialize_task_headers() - Initialize task headers + * @task: iscsi task + * @tx_desc: iser tx descriptor + * + * Notes: + * This routine may race with iser teardown flow for scsi + * error handling TMFs. So for TMF we should acquire the + * state mutex to avoid dereferencing the IB device which + * may have already been terminated. + */ +int +iser_initialize_task_headers(struct iscsi_task *task, + struct iser_tx_desc *tx_desc) { - struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_conn *iser_conn = task->conn->dd_data; struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; + const bool mgmt_task = !task->sc && !in_interrupt(); + int ret = 0; + + if (unlikely(mgmt_task)) + mutex_lock(&iser_conn->state_mutex); + + if (unlikely(iser_conn->state != ISER_CONN_UP)) { + ret = -ENODEV; + goto out; + } dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); - if (ib_dma_mapping_error(device->ib_device, dma_addr)) - return -ENOMEM; + if (ib_dma_mapping_error(device->ib_device, dma_addr)) { + ret = -ENOMEM; + goto out; + } tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; @@ -183,7 +207,11 @@ int iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].lkey = device->mr->lkey; iser_task->iser_conn = iser_conn; - return 0; +out: + if (unlikely(mgmt_task)) + mutex_unlock(&iser_conn->state_mutex); + + return ret; } /** @@ -199,9 +227,14 @@ static int iscsi_iser_task_init(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; + int ret; - if (iser_initialize_task_headers(task, &iser_task->desc)) - return -ENOMEM; + ret = iser_initialize_task_headers(task, &iser_task->desc); + if (ret) { + iser_err("Failed to init task %p, err = %d\n", + iser_task, ret); + return ret; + } /* mgmt task */ if (!task->sc) @@ -508,8 +541,8 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ if (iser_conn) { mutex_lock(&iser_conn->state_mutex); - iscsi_conn_stop(cls_conn, flag); iser_conn_terminate(iser_conn); + iscsi_conn_stop(cls_conn, flag); /* unbind */ iser_conn->iscsi_conn = NULL; @@ -541,12 +574,13 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) static inline unsigned int iser_dif_prot_caps(int prot_caps) { - return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | - SHOST_DIX_TYPE1_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | - SHOST_DIX_TYPE2_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | - SHOST_DIX_TYPE3_PROTECTION : 0); + return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? + SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION : 0) | + ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? + SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) | + ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? + SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0); } /** @@ -569,6 +603,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; + u16 max_cmds; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -586,26 +621,41 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ if (ep) { iser_conn = ep->dd_data; + max_cmds = iser_conn->max_cmds; + + mutex_lock(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_UP) { + iser_err("iser conn %p already started teardown\n", + iser_conn); + mutex_unlock(&iser_conn->state_mutex); + goto free_host; + } + ib_conn = &iser_conn->ib_conn; if (ib_conn->pi_support) { u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); - if (iser_pi_guard) - scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); - else - scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | + SHOST_DIX_GUARD_CRC); } - } - if (iscsi_host_add(shost, ep ? - ib_conn->device->ib_device->dma_device : NULL)) - goto free_host; + if (iscsi_host_add(shost, + ib_conn->device->ib_device->dma_device)) { + mutex_unlock(&iser_conn->state_mutex); + goto free_host; + } + mutex_unlock(&iser_conn->state_mutex); + } else { + max_cmds = ISER_DEF_XMIT_CMDS_MAX; + if (iscsi_host_add(shost, NULL)) + goto free_host; + } - if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) { + if (cmds_max > max_cmds) { iser_info("cmds_max changed from %u to %u\n", - cmds_max, ISER_DEF_XMIT_CMDS_MAX); - cmds_max = ISER_DEF_XMIT_CMDS_MAX; + cmds_max, max_cmds); + cmds_max = max_cmds; } cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index cd4174ca9a76..5ce26817e7e1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -69,34 +69,31 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "1.4.8" +#define DRV_VER "1.5" #define iser_dbg(fmt, arg...) \ do { \ - if (iser_debug_level > 2) \ + if (unlikely(iser_debug_level > 2)) \ printk(KERN_DEBUG PFX "%s: " fmt,\ __func__ , ## arg); \ } while (0) #define iser_warn(fmt, arg...) \ do { \ - if (iser_debug_level > 0) \ + if (unlikely(iser_debug_level > 0)) \ pr_warn(PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) #define iser_info(fmt, arg...) \ do { \ - if (iser_debug_level > 1) \ + if (unlikely(iser_debug_level > 1)) \ pr_info(PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) -#define iser_err(fmt, arg...) \ - do { \ - printk(KERN_ERR PFX "%s: " fmt, \ - __func__ , ## arg); \ - } while (0) +#define iser_err(fmt, arg...) \ + pr_err(PFX "%s: " fmt, __func__ , ## arg) #define SHIFT_4K 12 #define SIZE_4K (1ULL << SHIFT_4K) @@ -144,6 +141,11 @@ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) +#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \ + - ISER_MAX_TX_MISC_PDUS \ + - ISER_MAX_RX_MISC_PDUS) / \ + (1 + ISER_INFLIGHT_DATAOUTS)) + #define ISER_WC_BATCH_COUNT 16 #define ISER_SIGNAL_CMD_COUNT 32 @@ -247,7 +249,6 @@ struct iscsi_endpoint; * @va: MR start address (buffer va) * @len: MR length * @mem_h: pointer to registration context (FMR/Fastreg) - * @is_mr: indicates weather we registered the buffer */ struct iser_mem_reg { u32 lkey; @@ -255,7 +256,6 @@ struct iser_mem_reg { u64 va; u64 len; void *mem_h; - int is_mr; }; /** @@ -323,8 +323,6 @@ struct iser_rx_desc { char pad[ISER_RX_PAD_SIZE]; } __attribute__((packed)); -#define ISER_MAX_CQ 4 - struct iser_conn; struct ib_conn; struct iscsi_iser_task; @@ -375,7 +373,7 @@ struct iser_device { struct list_head ig_list; int refcount; int comps_used; - struct iser_comp comps[ISER_MAX_CQ]; + struct iser_comp *comps; int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn, unsigned cmds_max); void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn); @@ -432,6 +430,7 @@ struct fast_reg_descriptor { * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair * @post_recv_buf_count: post receive counter + * @sig_count: send work request signal count * @rx_wr: receive work request for batch posts * @device: reference to iser device * @comp: iser completion context @@ -452,6 +451,7 @@ struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; int post_recv_buf_count; + u8 sig_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; @@ -482,6 +482,7 @@ struct ib_conn { * to max number of post recvs * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1) * @min_posted_rx: (qp_max_recv_dtos >> 2) + * @max_cmds: maximum cmds allowed for this connection * @name: connection peer portal * @release_work: deffered work for release job * @state_mutex: protects iser onnection state @@ -507,6 +508,7 @@ struct iser_conn { unsigned qp_max_recv_dtos; unsigned qp_max_recv_dtos_mask; unsigned min_posted_rx; + u16 max_cmds; char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; struct mutex state_mutex; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5a489ea63732..3821633f1065 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -369,7 +369,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) return 0; } -static inline bool iser_signal_comp(int sig_count) +static inline bool iser_signal_comp(u8 sig_count) { return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); } @@ -388,7 +388,7 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; - static unsigned sig_count; + u8 sig_count = ++iser_conn->ib_conn.sig_count; edtl = ntohl(hdr->data_length); @@ -435,7 +435,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; err = iser_post_send(&iser_conn->ib_conn, tx_desc, - iser_signal_comp(++sig_count)); + iser_signal_comp(sig_count)); if (!err) return 0; diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 6c5ce357fba6..abce9339333f 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -73,7 +73,6 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, if (cmd_dir == ISER_DIR_OUT) { /* copy the unaligned sg the buffer which is used for RDMA */ - int i; char *p, *from; sgl = (struct scatterlist *)data->buf; @@ -409,7 +408,6 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, regd_buf->reg.rkey = device->mr->rkey; regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); - regd_buf->reg.is_mr = 0; iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " "va: 0x%08lX sz: %ld]\n", @@ -440,13 +438,13 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static inline void +static void iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, struct ib_sig_domain *domain) { domain->sig_type = IB_SIG_TYPE_T10_DIF; - domain->sig.dif.pi_interval = sc->device->sector_size; - domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff; + domain->sig.dif.pi_interval = scsi_prot_interval(sc); + domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc); /* * At the moment we hard code those, but in the future * we will take them from sc. @@ -454,8 +452,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, domain->sig.dif.apptag_check_mask = 0xffff; domain->sig.dif.app_escape = true; domain->sig.dif.ref_escape = true; - if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 || - scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2) + if (sc->prot_flags & SCSI_PROT_REF_INCREMENT) domain->sig.dif.ref_remap = true; }; @@ -473,26 +470,16 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) case SCSI_PROT_WRITE_STRIP: sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); - /* - * At the moment we use this modparam to tell what is - * the memory bg_type, in the future we will take it - * from sc. - */ - sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : - IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ? + IB_T10DIF_CSUM : IB_T10DIF_CRC; break; case SCSI_PROT_READ_PASS: case SCSI_PROT_WRITE_PASS: iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); - /* - * At the moment we use this modparam to tell what is - * the memory bg_type, in the future we will take it - * from sc. - */ - sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : - IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ? + IB_T10DIF_CSUM : IB_T10DIF_CRC; break; default: iser_err("Unsupported PI operation %d\n", @@ -503,26 +490,28 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) return 0; } -static int +static inline void iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) { - switch (scsi_get_prot_type(sc)) { - case SCSI_PROT_DIF_TYPE0: - break; - case SCSI_PROT_DIF_TYPE1: - case SCSI_PROT_DIF_TYPE2: - *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG; - break; - case SCSI_PROT_DIF_TYPE3: - *mask = ISER_CHECK_GUARD; - break; - default: - iser_err("Unsupported protection type %d\n", - scsi_get_prot_type(sc)); - return -EINVAL; - } + *mask = 0; + if (sc->prot_flags & SCSI_PROT_REF_CHECK) + *mask |= ISER_CHECK_REFTAG; + if (sc->prot_flags & SCSI_PROT_GUARD_CHECK) + *mask |= ISER_CHECK_GUARD; +} - return 0; +static void +iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) +{ + u32 rkey; + + memset(inv_wr, 0, sizeof(*inv_wr)); + inv_wr->opcode = IB_WR_LOCAL_INV; + inv_wr->wr_id = ISER_FASTREG_LI_WRID; + inv_wr->ex.invalidate_rkey = mr->rkey; + + rkey = ib_inc_rkey(mr->rkey); + ib_update_fast_reg_key(mr, rkey); } static int @@ -536,26 +525,17 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct ib_send_wr *bad_wr, *wr = NULL; struct ib_sig_attrs sig_attrs; int ret; - u32 key; memset(&sig_attrs, 0, sizeof(sig_attrs)); ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); if (ret) goto err; - ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); - if (ret) - goto err; + iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) { - memset(&inv_wr, 0, sizeof(inv_wr)); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.wr_id = ISER_FASTREG_LI_WRID; - inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; + iser_inv_rkey(&inv_wr, pi_ctx->sig_mr); wr = &inv_wr; - /* Bump the key */ - key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); } memset(&sig_wr, 0, sizeof(sig_wr)); @@ -585,12 +565,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, sig_sge->lkey = pi_ctx->sig_mr->lkey; sig_sge->addr = 0; - sig_sge->length = data_sge->length + prot_sge->length; - if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT || - scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) { - sig_sge->length += (data_sge->length / - iser_task->sc->device->sector_size) * 8; - } + sig_sge->length = scsi_transfer_length(iser_task->sc); iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n", sig_sge->addr, sig_sge->length, @@ -613,7 +588,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_fast_reg_page_list *frpl; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; - u8 key; int ret, offset, size, plen; /* if there a single dma entry, dma mr suffices */ @@ -645,14 +619,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, } if (!(desc->reg_indicators & ind)) { - memset(&inv_wr, 0, sizeof(inv_wr)); - inv_wr.wr_id = ISER_FASTREG_LI_WRID; - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.ex.invalidate_rkey = mr->rkey; + iser_inv_rkey(&inv_wr, mr); wr = &inv_wr; - /* Bump the key */ - key = (u8)(mr->rkey & 0x000000FF); - ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ @@ -770,15 +738,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey; regd_buf->reg.va = sig_sge.addr; regd_buf->reg.len = sig_sge.length; - regd_buf->reg.is_mr = 1; } else { - if (desc) { + if (desc) regd_buf->reg.rkey = desc->data_mr->rkey; - regd_buf->reg.is_mr = 1; - } else { + else regd_buf->reg.rkey = device->mr->rkey; - regd_buf->reg.is_mr = 0; - } regd_buf->reg.lkey = data_sge.lkey; regd_buf->reg.va = data_sge.addr; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 67225bb82bb5..695a2704bd43 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -76,7 +76,7 @@ static void iser_event_handler(struct ib_event_handler *handler, static int iser_create_device_ib_res(struct iser_device *device) { struct ib_device_attr *dev_attr = &device->dev_attr; - int ret, i; + int ret, i, max_cqe; ret = ib_query_device(device->ib_device, dev_attr); if (ret) { @@ -104,11 +104,19 @@ static int iser_create_device_ib_res(struct iser_device *device) return -1; } - device->comps_used = min(ISER_MAX_CQ, + device->comps_used = min_t(int, num_online_cpus(), device->ib_device->num_comp_vectors); - iser_info("using %d CQs, device %s supports %d vectors\n", + + device->comps = kcalloc(device->comps_used, sizeof(*device->comps), + GFP_KERNEL); + if (!device->comps) + goto comps_err; + + max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe); + + iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", device->comps_used, device->ib_device->name, - device->ib_device->num_comp_vectors); + device->ib_device->num_comp_vectors, max_cqe); device->pd = ib_alloc_pd(device->ib_device); if (IS_ERR(device->pd)) @@ -122,7 +130,7 @@ static int iser_create_device_ib_res(struct iser_device *device) iser_cq_callback, iser_cq_event_callback, (void *)comp, - ISER_MAX_CQ_LEN, i); + max_cqe, i); if (IS_ERR(comp->cq)) { comp->cq = NULL; goto cq_err; @@ -162,6 +170,8 @@ cq_err: } ib_dealloc_pd(device->pd); pd_err: + kfree(device->comps); +comps_err: iser_err("failed to allocate an IB resource\n"); return -1; } @@ -187,6 +197,9 @@ static void iser_free_device_ib_res(struct iser_device *device) (void)ib_dereg_mr(device->mr); (void)ib_dealloc_pd(device->pd); + kfree(device->comps); + device->comps = NULL; + device->mr = NULL; device->pd = NULL; } @@ -425,7 +438,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) */ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { + struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, + ib_conn); struct iser_device *device; + struct ib_device_attr *dev_attr; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; int index, min_index = 0; @@ -433,6 +449,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) BUG_ON(ib_conn->device == NULL); device = ib_conn->device; + dev_attr = &device->dev_attr; memset(&init_attr, 0, sizeof init_attr); @@ -460,8 +477,20 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) if (ib_conn->pi_support) { init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + iser_conn->max_cmds = + ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); } else { - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; + if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; + iser_conn->max_cmds = + ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); + } else { + init_attr.cap.max_send_wr = dev_attr->max_qp_wr; + iser_conn->max_cmds = + ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); + iser_dbg("device %s supports max_send_wr %d\n", + device->ib_device->name, dev_attr->max_qp_wr); + } } ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); @@ -475,7 +504,11 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) return ret; out_err: + mutex_lock(&ig.connlist_mutex); + ib_conn->comp->active_qps--; + mutex_unlock(&ig.connlist_mutex); iser_err("unable to alloc mem or create resource, err %d\n", ret); + return ret; } @@ -610,9 +643,11 @@ void iser_conn_release(struct iser_conn *iser_conn) mutex_unlock(&ig.connlist_mutex); mutex_lock(&iser_conn->state_mutex); - if (iser_conn->state != ISER_CONN_DOWN) + if (iser_conn->state != ISER_CONN_DOWN) { iser_warn("iser conn %p state %d, expected state down.\n", iser_conn, iser_conn->state); + iser_conn->state = ISER_CONN_DOWN; + } /* * In case we never got to bind stage, we still need to * release IB resources (which is safe to call more than once). @@ -662,8 +697,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn) /* post an indication that all flush errors were consumed */ err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); - if (err) + if (err) { iser_err("conn %p failed to post beacon", ib_conn); + return 1; + } wait_for_completion(&ib_conn->flush_comp); } @@ -846,20 +883,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve break; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_ADDR_CHANGE: - iser_disconnected_handler(cma_id); + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + iser_cleanup_handler(cma_id, false); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: /* * we *must* destroy the device as we cannot rely * on iscsid to be around to initiate error handling. - * also implicitly destroy the cma_id. + * also if we are not in state DOWN implicitly destroy + * the cma_id. */ iser_cleanup_handler(cma_id, true); - iser_conn->ib_conn.cma_id = NULL; - ret = 1; - break; - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - iser_cleanup_handler(cma_id, false); + if (iser_conn->state != ISER_CONN_DOWN) { + iser_conn->ib_conn.cma_id = NULL; + ret = 1; + } break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); @@ -981,7 +1019,6 @@ int iser_reg_page_vec(struct ib_conn *ib_conn, mem_reg->rkey = mem->fmr->rkey; mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; - mem_reg->is_mr = 1; mem_reg->mem_h = (void *)mem; mem_reg->va += page_vec->offset; @@ -1008,7 +1045,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; int ret; - if (!reg->is_mr) + if (!reg->mem_h) return; iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); @@ -1028,11 +1065,10 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, struct ib_conn *ib_conn = &iser_conn->ib_conn; struct fast_reg_descriptor *desc = reg->mem_h; - if (!reg->is_mr) + if (!desc) return; reg->mem_h = NULL; - reg->is_mr = 0; spin_lock_bh(&ib_conn->lock); list_add_tail(&desc->list, &ib_conn->fastreg.pool); spin_unlock_bh(&ib_conn->lock); @@ -1049,7 +1085,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) sge.length = ISER_RX_LOGIN_SIZE; sge.lkey = ib_conn->device->mr->lkey; - rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf; + rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; rx_wr.next = NULL; @@ -1073,7 +1109,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { rx_desc = &iser_conn->rx_descs[my_rx_head]; - rx_wr->wr_id = (unsigned long)rx_desc; + rx_wr->wr_id = (uintptr_t)rx_desc; rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; @@ -1110,7 +1146,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, DMA_TO_DEVICE); send_wr.next = NULL; - send_wr.wr_id = (unsigned long)tx_desc; + send_wr.wr_id = (uintptr_t)tx_desc; send_wr.sg_list = tx_desc->tx_sg; send_wr.num_sge = tx_desc->num_sge; send_wr.opcode = IB_WR_SEND; @@ -1160,6 +1196,7 @@ static void iser_handle_comp_error(struct ib_conn *ib_conn, struct ib_wc *wc) { + void *wr_id = (void *)(uintptr_t)wc->wr_id; struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); @@ -1168,8 +1205,8 @@ iser_handle_comp_error(struct ib_conn *ib_conn, iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); - if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) { - struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id; + if (is_iser_tx_desc(iser_conn, wr_id)) { + struct iser_tx_desc *desc = wr_id; if (desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); @@ -1193,14 +1230,14 @@ static void iser_handle_wc(struct ib_wc *wc) struct iser_rx_desc *rx_desc; ib_conn = wc->qp->qp_context; - if (wc->status == IB_WC_SUCCESS) { + if (likely(wc->status == IB_WC_SUCCESS)) { if (wc->opcode == IB_WC_RECV) { - rx_desc = (struct iser_rx_desc *)wc->wr_id; + rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; iser_rcv_completion(rx_desc, wc->byte_len, ib_conn); } else if (wc->opcode == IB_WC_SEND) { - tx_desc = (struct iser_tx_desc *)wc->wr_id; + tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; iser_snd_completion(tx_desc, ib_conn); } else { iser_err("Unknown wc opcode %d\n", wc->opcode); |