diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 21:11:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 21:11:05 -0700 |
commit | 572c01ba19ef150e98aea0b45ca17d43356521b5 (patch) | |
tree | 289381d051dfc34a86be988700ee11cb9ad0cd5b /drivers/scsi/lpfc/lpfc_nvmet.c | |
parent | cef5d0f952a03d42051141742632078d488b0c6b (diff) | |
parent | 2441500a41a9b17ff657626eb81972f62bc8cc5a (diff) |
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley:
"This is mostly updates of the usual suspects: lpfc, qla2xxx, hisi_sas,
megaraid_sas, zfcp and a host of minor updates.
The major driver change here is the elimination of the block based
cciss driver in favour of the SCSI based hpsa driver (which now drives
all the legacy cases cciss used to be required for). Plus a reset
handler clean up and the redo of the SAS SMP handler to use bsg lib"
* tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (279 commits)
scsi: scsi-mq: Always unprepare before requeuing a request
scsi: Show .retries and .jiffies_at_alloc in debugfs
scsi: Improve requeuing behavior
scsi: Call scsi_initialize_rq() for filesystem requests
scsi: qla2xxx: Reset the logo flag, after target re-login.
scsi: qla2xxx: Fix slow mem alloc behind lock
scsi: qla2xxx: Clear fc4f_nvme flag
scsi: qla2xxx: add missing includes for qla_isr
scsi: qla2xxx: Fix an integer overflow in sysfs code
scsi: aacraid: report -ENOMEM to upper layer from aac_convert_sgraw2()
scsi: aacraid: get rid of one level of indentation
scsi: aacraid: fix indentation errors
scsi: storvsc: fix memory leak on ring buffer busy
scsi: scsi_transport_sas: switch to bsg-lib for SMP passthrough
scsi: smartpqi: remove the smp_handler stub
scsi: hpsa: remove the smp_handler stub
scsi: bsg-lib: pass the release callback through bsg_setup_queue
scsi: Rework handling of scsi_device.vpd_pg8[03]
scsi: Rework the code for caching Vital Product Data (VPD)
scsi: rcu: Introduce rcu_swap_protected()
...
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_nvmet.c')
-rw-r--r-- | drivers/scsi/lpfc/lpfc_nvmet.c | 279 |
1 files changed, 211 insertions, 68 deletions
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index bbbd0f84160d..0b7c1a49e203 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -170,12 +170,14 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct rqb_dmabuf *nvmebuf; + struct lpfc_nvmet_ctx_info *infop; uint32_t *payload; uint32_t size, oxid, sid, rc; + int cpu; unsigned long iflag; if (ctxp->txrdy) { - pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, + dma_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, ctxp->txrdy_phys); ctxp->txrdy = NULL; ctxp->txrdy_phys = 0; @@ -267,11 +269,16 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) } spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag); - spin_lock_irqsave(&phba->sli4_hba.nvmet_ctx_put_lock, iflag); - list_add_tail(&ctx_buf->list, - &phba->sli4_hba.lpfc_nvmet_ctx_put_list); - phba->sli4_hba.nvmet_ctx_put_cnt++; - spin_unlock_irqrestore(&phba->sli4_hba.nvmet_ctx_put_lock, iflag); + /* + * Use the CPU context list, from the MRQ the IO was received on + * (ctxp->idx), to save context structure. + */ + cpu = smp_processor_id(); + infop = lpfc_get_ctx_list(phba, cpu, ctxp->idx); + spin_lock_irqsave(&infop->nvmet_ctx_list_lock, iflag); + list_add_tail(&ctx_buf->list, &infop->nvmet_ctx_list); + infop->nvmet_ctx_list_cnt++; + spin_unlock_irqrestore(&infop->nvmet_ctx_list_lock, iflag); #endif } @@ -552,7 +559,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, /* lpfc_nvmet_xmt_fcp_release() will recycle the context */ } else { ctxp->entry_cnt++; - start_clean = offsetof(struct lpfc_iocbq, wqe); + start_clean = offsetof(struct lpfc_iocbq, iocb_flag); memset(((char *)cmdwqe) + start_clean, 0, (sizeof(struct lpfc_iocbq) - start_clean)); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -879,51 +886,54 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { }; static void -lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba) +__lpfc_nvmet_clean_io_for_cpu(struct lpfc_hba *phba, + struct lpfc_nvmet_ctx_info *infop) { struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf; unsigned long flags; - spin_lock_irqsave(&phba->sli4_hba.nvmet_ctx_get_lock, flags); - spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock); + spin_lock_irqsave(&infop->nvmet_ctx_list_lock, flags); list_for_each_entry_safe(ctx_buf, next_ctx_buf, - &phba->sli4_hba.lpfc_nvmet_ctx_get_list, list) { + &infop->nvmet_ctx_list, list) { spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); list_del_init(&ctx_buf->list); spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); - __lpfc_clear_active_sglq(phba, - ctx_buf->sglq->sli4_lxritag); + + __lpfc_clear_active_sglq(phba, ctx_buf->sglq->sli4_lxritag); ctx_buf->sglq->state = SGL_FREED; ctx_buf->sglq->ndlp = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); list_add_tail(&ctx_buf->sglq->list, - &phba->sli4_hba.lpfc_nvmet_sgl_list); + &phba->sli4_hba.lpfc_nvmet_sgl_list); spin_unlock(&phba->sli4_hba.sgl_list_lock); lpfc_sli_release_iocbq(phba, ctx_buf->iocbq); kfree(ctx_buf->context); } - list_for_each_entry_safe(ctx_buf, next_ctx_buf, - &phba->sli4_hba.lpfc_nvmet_ctx_put_list, list) { - spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); - list_del_init(&ctx_buf->list); - spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); - __lpfc_clear_active_sglq(phba, - ctx_buf->sglq->sli4_lxritag); - ctx_buf->sglq->state = SGL_FREED; - ctx_buf->sglq->ndlp = NULL; + spin_unlock_irqrestore(&infop->nvmet_ctx_list_lock, flags); +} - spin_lock(&phba->sli4_hba.sgl_list_lock); - list_add_tail(&ctx_buf->sglq->list, - &phba->sli4_hba.lpfc_nvmet_sgl_list); - spin_unlock(&phba->sli4_hba.sgl_list_lock); +static void +lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba) +{ + struct lpfc_nvmet_ctx_info *infop; + int i, j; - lpfc_sli_release_iocbq(phba, ctx_buf->iocbq); - kfree(ctx_buf->context); + /* The first context list, MRQ 0 CPU 0 */ + infop = phba->sli4_hba.nvmet_ctx_info; + if (!infop) + return; + + /* Cycle the the entire CPU context list for every MRQ */ + for (i = 0; i < phba->cfg_nvmet_mrq; i++) { + for (j = 0; j < phba->sli4_hba.num_present_cpu; j++) { + __lpfc_nvmet_clean_io_for_cpu(phba, infop); + infop++; /* next */ + } } - spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock); - spin_unlock_irqrestore(&phba->sli4_hba.nvmet_ctx_get_lock, flags); + kfree(phba->sli4_hba.nvmet_ctx_info); + phba->sli4_hba.nvmet_ctx_info = NULL; } static int @@ -932,15 +942,71 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) struct lpfc_nvmet_ctxbuf *ctx_buf; struct lpfc_iocbq *nvmewqe; union lpfc_wqe128 *wqe; - int i; + struct lpfc_nvmet_ctx_info *last_infop; + struct lpfc_nvmet_ctx_info *infop; + int i, j, idx; lpfc_printf_log(phba, KERN_INFO, LOG_NVME, "6403 Allocate NVMET resources for %d XRIs\n", phba->sli4_hba.nvmet_xri_cnt); + phba->sli4_hba.nvmet_ctx_info = kcalloc( + phba->sli4_hba.num_present_cpu * phba->cfg_nvmet_mrq, + sizeof(struct lpfc_nvmet_ctx_info), GFP_KERNEL); + if (!phba->sli4_hba.nvmet_ctx_info) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6419 Failed allocate memory for " + "nvmet context lists\n"); + return -ENOMEM; + } + + /* + * Assuming X CPUs in the system, and Y MRQs, allocate some + * lpfc_nvmet_ctx_info structures as follows: + * + * cpu0/mrq0 cpu1/mrq0 ... cpuX/mrq0 + * cpu0/mrq1 cpu1/mrq1 ... cpuX/mrq1 + * ... + * cpuX/mrqY cpuX/mrqY ... cpuX/mrqY + * + * Each line represents a MRQ "silo" containing an entry for + * every CPU. + * + * MRQ X is initially assumed to be associated with CPU X, thus + * contexts are initially distributed across all MRQs using + * the MRQ index (N) as follows cpuN/mrqN. When contexts are + * freed, the are freed to the MRQ silo based on the CPU number + * of the IO completion. Thus a context that was allocated for MRQ A + * whose IO completed on CPU B will be freed to cpuB/mrqA. + */ + infop = phba->sli4_hba.nvmet_ctx_info; + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + for (j = 0; j < phba->cfg_nvmet_mrq; j++) { + INIT_LIST_HEAD(&infop->nvmet_ctx_list); + spin_lock_init(&infop->nvmet_ctx_list_lock); + infop->nvmet_ctx_list_cnt = 0; + infop++; + } + } + + /* + * Setup the next CPU context info ptr for each MRQ. + * MRQ 0 will cycle thru CPUs 0 - X separately from + * MRQ 1 cycling thru CPUs 0 - X, and so on. + */ + for (j = 0; j < phba->cfg_nvmet_mrq; j++) { + last_infop = lpfc_get_ctx_list(phba, 0, j); + for (i = phba->sli4_hba.num_present_cpu - 1; i >= 0; i--) { + infop = lpfc_get_ctx_list(phba, i, j); + infop->nvmet_ctx_next_cpu = last_infop; + last_infop = infop; + } + } + /* For all nvmet xris, allocate resources needed to process a * received command on a per xri basis. */ + idx = 0; for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) { ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL); if (!ctx_buf) { @@ -977,7 +1043,6 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) /* Word 7 */ bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI); bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3); - bf_set(wqe_pu, &wqe->generic.wqe_com, 1); /* Word 10 */ bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1); bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0); @@ -995,12 +1060,35 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) "6407 Ran out of NVMET XRIs\n"); return -ENOMEM; } - spin_lock(&phba->sli4_hba.nvmet_ctx_get_lock); - list_add_tail(&ctx_buf->list, - &phba->sli4_hba.lpfc_nvmet_ctx_get_list); - spin_unlock(&phba->sli4_hba.nvmet_ctx_get_lock); + + /* + * Add ctx to MRQidx context list. Our initial assumption + * is MRQidx will be associated with CPUidx. This association + * can change on the fly. + */ + infop = lpfc_get_ctx_list(phba, idx, idx); + spin_lock(&infop->nvmet_ctx_list_lock); + list_add_tail(&ctx_buf->list, &infop->nvmet_ctx_list); + infop->nvmet_ctx_list_cnt++; + spin_unlock(&infop->nvmet_ctx_list_lock); + + /* Spread ctx structures evenly across all MRQs */ + idx++; + if (idx >= phba->cfg_nvmet_mrq) + idx = 0; + } + + infop = phba->sli4_hba.nvmet_ctx_info; + for (j = 0; j < phba->cfg_nvmet_mrq; j++) { + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, + "6408 TOTAL NVMET ctx for CPU %d " + "MRQ %d: cnt %d nextcpu %p\n", + i, j, infop->nvmet_ctx_list_cnt, + infop->nvmet_ctx_next_cpu); + infop++; + } } - phba->sli4_hba.nvmet_ctx_get_cnt = phba->sli4_hba.nvmet_xri_cnt; return 0; } @@ -1365,10 +1453,65 @@ dropit: #endif } +static struct lpfc_nvmet_ctxbuf * +lpfc_nvmet_replenish_context(struct lpfc_hba *phba, + struct lpfc_nvmet_ctx_info *current_infop) +{ + struct lpfc_nvmet_ctxbuf *ctx_buf = NULL; + struct lpfc_nvmet_ctx_info *get_infop; + int i; + + /* + * The current_infop for the MRQ a NVME command IU was received + * on is empty. Our goal is to replenish this MRQs context + * list from a another CPUs. + * + * First we need to pick a context list to start looking on. + * nvmet_ctx_start_cpu has available context the last time + * we needed to replenish this CPU where nvmet_ctx_next_cpu + * is just the next sequential CPU for this MRQ. + */ + if (current_infop->nvmet_ctx_start_cpu) + get_infop = current_infop->nvmet_ctx_start_cpu; + else + get_infop = current_infop->nvmet_ctx_next_cpu; + + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + if (get_infop == current_infop) { + get_infop = get_infop->nvmet_ctx_next_cpu; + continue; + } + spin_lock(&get_infop->nvmet_ctx_list_lock); + + /* Just take the entire context list, if there are any */ + if (get_infop->nvmet_ctx_list_cnt) { + list_splice_init(&get_infop->nvmet_ctx_list, + ¤t_infop->nvmet_ctx_list); + current_infop->nvmet_ctx_list_cnt = + get_infop->nvmet_ctx_list_cnt - 1; + get_infop->nvmet_ctx_list_cnt = 0; + spin_unlock(&get_infop->nvmet_ctx_list_lock); + + current_infop->nvmet_ctx_start_cpu = get_infop; + list_remove_head(¤t_infop->nvmet_ctx_list, + ctx_buf, struct lpfc_nvmet_ctxbuf, + list); + return ctx_buf; + } + + /* Otherwise, move on to the next CPU for this MRQ */ + spin_unlock(&get_infop->nvmet_ctx_list_lock); + get_infop = get_infop->nvmet_ctx_next_cpu; + } + + /* Nothing found, all contexts for the MRQ are in-flight */ + return NULL; +} + /** * lpfc_nvmet_unsol_fcp_buffer - Process an unsolicited event data buffer * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a SLI ring. + * @idx: relative index of MRQ vector * @nvmebuf: pointer to lpfc nvme command HBQ data structure. * * This routine is used for processing the WQE associated with a unsolicited @@ -1380,22 +1523,26 @@ dropit: **/ static void lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, + uint32_t idx, struct rqb_dmabuf *nvmebuf, uint64_t isr_timestamp) { -#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_ctxbuf *ctx_buf; + struct lpfc_nvmet_ctx_info *current_infop; uint32_t *payload; uint32_t size, oxid, sid, rc, qno; unsigned long iflag; + int current_cpu; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t id; #endif + if (!IS_ENABLED(CONFIG_NVME_TARGET_FC)) + return; + ctx_buf = NULL; if (!nvmebuf || !phba->targetport) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, @@ -1407,31 +1554,24 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, goto dropit; } - spin_lock_irqsave(&phba->sli4_hba.nvmet_ctx_get_lock, iflag); - if (phba->sli4_hba.nvmet_ctx_get_cnt) { - list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_get_list, + /* + * Get a pointer to the context list for this MRQ based on + * the CPU this MRQ IRQ is associated with. If the CPU association + * changes from our initial assumption, the context list could + * be empty, thus it would need to be replenished with the + * context list from another CPU for this MRQ. + */ + current_cpu = smp_processor_id(); + current_infop = lpfc_get_ctx_list(phba, current_cpu, idx); + spin_lock_irqsave(¤t_infop->nvmet_ctx_list_lock, iflag); + if (current_infop->nvmet_ctx_list_cnt) { + list_remove_head(¤t_infop->nvmet_ctx_list, ctx_buf, struct lpfc_nvmet_ctxbuf, list); - phba->sli4_hba.nvmet_ctx_get_cnt--; + current_infop->nvmet_ctx_list_cnt--; } else { - spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock); - if (phba->sli4_hba.nvmet_ctx_put_cnt) { - list_splice(&phba->sli4_hba.lpfc_nvmet_ctx_put_list, - &phba->sli4_hba.lpfc_nvmet_ctx_get_list); - INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_put_list); - phba->sli4_hba.nvmet_ctx_get_cnt = - phba->sli4_hba.nvmet_ctx_put_cnt; - phba->sli4_hba.nvmet_ctx_put_cnt = 0; - spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock); - - list_remove_head( - &phba->sli4_hba.lpfc_nvmet_ctx_get_list, - ctx_buf, struct lpfc_nvmet_ctxbuf, list); - phba->sli4_hba.nvmet_ctx_get_cnt--; - } else { - spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock); - } + ctx_buf = lpfc_nvmet_replenish_context(phba, current_infop); } - spin_unlock_irqrestore(&phba->sli4_hba.nvmet_ctx_get_lock, iflag); + spin_unlock_irqrestore(¤t_infop->nvmet_ctx_list_lock, iflag); fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); oxid = be16_to_cpu(fc_hdr->fh_ox_id); @@ -1483,6 +1623,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->size = size; ctxp->oxid = oxid; ctxp->sid = sid; + ctxp->idx = idx; ctxp->state = LPFC_NVMET_STE_RCV; ctxp->entry_cnt = 1; ctxp->flag = 0; @@ -1556,7 +1697,6 @@ dropit: if (nvmebuf) lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */ -#endif } /** @@ -1591,7 +1731,7 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /** * lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a SLI ring. + * @idx: relative index of MRQ vector * @nvmebuf: pointer to received nvme data structure. * * This routine is used to process an unsolicited event received from a SLI @@ -1602,7 +1742,7 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, **/ void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, + uint32_t idx, struct rqb_dmabuf *nvmebuf, uint64_t isr_timestamp) { @@ -1610,7 +1750,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, lpfc_rq_buf_free(phba, &nvmebuf->hbuf); return; } - lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf, + lpfc_nvmet_unsol_fcp_buffer(phba, idx, nvmebuf, isr_timestamp); } @@ -1863,6 +2003,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, nvmewqe->sli4_xritag); /* Word 7 */ + bf_set(wqe_pu, &wqe->fcp_tsend.wqe_com, 1); bf_set(wqe_cmnd, &wqe->fcp_tsend.wqe_com, CMD_FCP_TSEND64_WQE); /* Word 8 */ @@ -1939,7 +2080,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, case NVMET_FCOP_WRITEDATA: /* Words 0 - 2 : The first sg segment */ - txrdy = pci_pool_alloc(phba->txrdy_payload_pool, + txrdy = dma_pool_alloc(phba->txrdy_payload_pool, GFP_KERNEL, &physaddr); if (!txrdy) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, @@ -1971,6 +2112,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, nvmewqe->sli4_xritag); /* Word 7 */ + bf_set(wqe_pu, &wqe->fcp_treceive.wqe_com, 1); bf_set(wqe_ar, &wqe->fcp_treceive.wqe_com, 0); bf_set(wqe_cmnd, &wqe->fcp_treceive.wqe_com, CMD_FCP_TRECEIVE64_WQE); @@ -2054,6 +2196,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, nvmewqe->sli4_xritag); /* Word 7 */ + bf_set(wqe_pu, &wqe->fcp_trsp.wqe_com, 0); bf_set(wqe_ag, &wqe->fcp_trsp.wqe_com, 1); bf_set(wqe_cmnd, &wqe->fcp_trsp.wqe_com, CMD_FCP_TRSP64_WQE); |