summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorVu Pham <vu@mellanox.com>2013-08-28 23:23:35 +0300
committerNicholas Bellinger <nab@linux-iscsi.org>2013-09-10 16:48:51 -0700
commit59464ef4fbb567bcd3b160ae12c3eb087f4e7568 (patch)
treedc9af410be7fae4c88a919e5190f4728fb3bfedb /drivers/infiniband
parentd40945d8c2fbbff8adb6f35cc3e268df4934eb2a (diff)
iser-target: introduce fast memory registration mode (FRWR)
This model was introduced in 00f7ec36c "RDMA/core: Add memory management extensions support" and works when the IB device supports the IB_DEVICE_MEM_MGT_EXTENSIONS capability. Upon creating the isert device, ib_isert will test whether the HCA supports FRWR. If supported then set the flag and assign function pointers that handle fast registration and deregistration of appropriate resources (fast_reg descriptors). When new connection coming in, ib_isert will check frwr flag and create frwr resouces, if fail to do it will switch back to old model of using global dma key and turn off the frwr support. Registration is done using posting IB_WR_FAST_REG_MR to the QP and invalidations using posting IB_WR_LOCAL_INV. Signed-off-by: Vu Pham <vu@mellanox.com> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c367
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h17
2 files changed, 371 insertions, 13 deletions
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 4c3d66009237..51c3bed6c12b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -45,6 +45,11 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
static int
isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);
+static void
+isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+static int
+isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr);
static void
isert_qp_event_callback(struct ib_event *e, void *context)
@@ -85,14 +90,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
{
struct isert_device *device = isert_conn->conn_device;
struct ib_qp_init_attr attr;
- struct ib_device_attr devattr;
int ret, index, min_index = 0;
- memset(&devattr, 0, sizeof(struct ib_device_attr));
- ret = isert_query_device(cma_id->device, &devattr);
- if (ret)
- return ret;
-
mutex_lock(&device_list_mutex);
for (index = 0; index < device->cqs_used; index++)
if (device->cq_active_qps[index] <
@@ -113,7 +112,7 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
* FIXME: Use devattr.max_sge - 2 for max_send_sge as
* work-around for RDMA_READ..
*/
- attr.cap.max_send_sge = devattr.max_sge - 2;
+ attr.cap.max_send_sge = device->dev_attr.max_sge - 2;
isert_conn->max_sge = attr.cap.max_send_sge;
attr.cap.max_recv_sge = 1;
@@ -215,18 +214,31 @@ isert_create_device_ib_res(struct isert_device *device)
{
struct ib_device *ib_dev = device->ib_device;
struct isert_cq_desc *cq_desc;
+ struct ib_device_attr *dev_attr;
int ret = 0, i, j;
+ dev_attr = &device->dev_attr;
+ ret = isert_query_device(ib_dev, dev_attr);
+ if (ret)
+ return ret;
+
/* asign function handlers */
- device->reg_rdma_mem = isert_map_rdma;
- device->unreg_rdma_mem = isert_unmap_cmd;
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ device->use_frwr = 1;
+ device->reg_rdma_mem = isert_reg_rdma_frwr;
+ device->unreg_rdma_mem = isert_unreg_rdma_frwr;
+ } else {
+ device->use_frwr = 0;
+ device->reg_rdma_mem = isert_map_rdma;
+ device->unreg_rdma_mem = isert_unmap_cmd;
+ }
device->cqs_used = min_t(int, num_online_cpus(),
device->ib_device->num_comp_vectors);
device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
- pr_debug("Using %d CQs, device %s supports %d vectors\n",
+ pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n",
device->cqs_used, device->ib_device->name,
- device->ib_device->num_comp_vectors);
+ device->ib_device->num_comp_vectors, device->use_frwr);
device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
device->cqs_used, GFP_KERNEL);
if (!device->cq_desc) {
@@ -372,6 +384,85 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
return device;
}
+static void
+isert_conn_free_frwr_pool(struct isert_conn *isert_conn)
+{
+ struct fast_reg_descriptor *fr_desc, *tmp;
+ int i = 0;
+
+ if (list_empty(&isert_conn->conn_frwr_pool))
+ return;
+
+ pr_debug("Freeing conn %p frwr pool", isert_conn);
+
+ list_for_each_entry_safe(fr_desc, tmp,
+ &isert_conn->conn_frwr_pool, list) {
+ list_del(&fr_desc->list);
+ ib_free_fast_reg_page_list(fr_desc->data_frpl);
+ ib_dereg_mr(fr_desc->data_mr);
+ kfree(fr_desc);
+ ++i;
+ }
+
+ if (i < isert_conn->conn_frwr_pool_size)
+ pr_warn("Pool still has %d regions registered\n",
+ isert_conn->conn_frwr_pool_size - i);
+}
+
+static int
+isert_conn_create_frwr_pool(struct isert_conn *isert_conn)
+{
+ struct fast_reg_descriptor *fr_desc;
+ struct isert_device *device = isert_conn->conn_device;
+ int i, ret;
+
+ INIT_LIST_HEAD(&isert_conn->conn_frwr_pool);
+ isert_conn->conn_frwr_pool_size = 0;
+ for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) {
+ fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
+ if (!fr_desc) {
+ pr_err("Failed to allocate fast_reg descriptor\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ fr_desc->data_frpl =
+ ib_alloc_fast_reg_page_list(device->ib_device,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(fr_desc->data_frpl)) {
+ pr_err("Failed to allocate fr_pg_list err=%ld\n",
+ PTR_ERR(fr_desc->data_frpl));
+ ret = PTR_ERR(fr_desc->data_frpl);
+ goto err;
+ }
+
+ fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(fr_desc->data_mr)) {
+ pr_err("Failed to allocate frmr err=%ld\n",
+ PTR_ERR(fr_desc->data_mr));
+ ret = PTR_ERR(fr_desc->data_mr);
+ ib_free_fast_reg_page_list(fr_desc->data_frpl);
+ goto err;
+ }
+ pr_debug("Create fr_desc %p page_list %p\n",
+ fr_desc, fr_desc->data_frpl->page_list);
+
+ fr_desc->valid = true;
+ list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
+ isert_conn->conn_frwr_pool_size++;
+ }
+
+ pr_debug("Creating conn %p frwr pool size=%d",
+ isert_conn, isert_conn->conn_frwr_pool_size);
+
+ return 0;
+
+err:
+ isert_conn_free_frwr_pool(isert_conn);
+ return ret;
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
@@ -398,6 +489,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
kref_init(&isert_conn->conn_kref);
kref_get(&isert_conn->conn_kref);
mutex_init(&isert_conn->conn_mutex);
+ spin_lock_init(&isert_conn->conn_lock);
cma_id->context = isert_conn;
isert_conn->conn_cm_id = cma_id;
@@ -455,6 +547,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
isert_conn->conn_pd = device->dev_pd;
isert_conn->conn_mr = device->dev_mr;
+ if (device->use_frwr) {
+ ret = isert_conn_create_frwr_pool(isert_conn);
+ if (ret) {
+ pr_err("Conn: %p failed to create frwr_pool\n", isert_conn);
+ goto out_frwr;
+ }
+ }
+
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
goto out_conn_dev;
@@ -468,6 +568,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
return 0;
out_conn_dev:
+ if (device->use_frwr)
+ isert_conn_free_frwr_pool(isert_conn);
+out_frwr:
isert_device_try_release(device);
out_rsp_dma_map:
ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
@@ -491,6 +594,9 @@ isert_connect_release(struct isert_conn *isert_conn)
pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+ if (device->use_frwr)
+ isert_conn_free_frwr_pool(isert_conn);
+
if (isert_conn->conn_qp) {
cq_index = ((struct isert_cq_desc *)
isert_conn->conn_qp->recv_cq->cq_context)->cq_index;
@@ -1266,6 +1372,36 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
}
static void
+isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
+{
+ struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ LIST_HEAD(unmap_list);
+
+ pr_debug("unreg_frwr_cmd: %p\n", isert_cmd);
+
+ if (wr->fr_desc) {
+ pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n",
+ isert_cmd, wr->fr_desc);
+ spin_lock_bh(&isert_conn->conn_lock);
+ list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool);
+ spin_unlock_bh(&isert_conn->conn_lock);
+ wr->fr_desc = NULL;
+ }
+
+ if (wr->sge) {
+ pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd);
+ ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ wr->sge = NULL;
+ }
+
+ wr->ib_sge = NULL;
+ wr->send_wr = NULL;
+}
+
+static void
isert_put_cmd(struct isert_cmd *isert_cmd)
{
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
@@ -1941,11 +2077,216 @@ unmap_sg:
}
static int
+isert_map_fr_pagelist(struct ib_device *ib_dev,
+ struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
+{
+ u64 start_addr, end_addr, page, chunk_start = 0;
+ struct scatterlist *tmp_sg;
+ int i = 0, new_chunk, last_ent, n_pages;
+
+ n_pages = 0;
+ new_chunk = 1;
+ last_ent = sg_nents - 1;
+ for_each_sg(sg_start, tmp_sg, sg_nents, i) {
+ start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
+ if (new_chunk)
+ chunk_start = start_addr;
+ end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
+
+ pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n",
+ i, (unsigned long long)tmp_sg->dma_address,
+ tmp_sg->length);
+
+ if ((end_addr & ~PAGE_MASK) && i < last_ent) {
+ new_chunk = 0;
+ continue;
+ }
+ new_chunk = 1;
+
+ page = chunk_start & PAGE_MASK;
+ do {
+ fr_pl[n_pages++] = page;
+ pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n",
+ n_pages - 1, page);
+ page += PAGE_SIZE;
+ } while (page < end_addr);
+ }
+
+ return n_pages;
+}
+
+static int
+isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc,
+ struct isert_cmd *isert_cmd, struct isert_conn *isert_conn,
+ struct ib_sge *ib_sge, u32 offset, unsigned int data_len)
+{
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct scatterlist *sg_start;
+ u32 sg_off, page_off;
+ struct ib_send_wr fr_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ u8 key;
+ int ret, sg_nents, pagelist_len;
+
+ sg_off = offset / PAGE_SIZE;
+ sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+ sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off,
+ ISCSI_ISER_SG_TABLESIZE);
+ page_off = offset % PAGE_SIZE;
+
+ pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n",
+ isert_cmd, fr_desc, sg_nents, sg_off, offset);
+
+ pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents,
+ &fr_desc->data_frpl->page_list[0]);
+
+ if (!fr_desc->valid) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(fr_desc->data_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(fr_desc->data_mr, ++key);
+ }
+
+ /* Prepare FASTREG WR */
+ memset(&fr_wr, 0, sizeof(fr_wr));
+ fr_wr.opcode = IB_WR_FAST_REG_MR;
+ fr_wr.wr.fast_reg.iova_start =
+ fr_desc->data_frpl->page_list[0] + page_off;
+ fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl;
+ fr_wr.wr.fast_reg.page_list_len = pagelist_len;
+ fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr_wr.wr.fast_reg.length = data_len;
+ fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey;
+ fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
+
+ if (!wr)
+ wr = &fr_wr;
+ else
+ wr->next = &fr_wr;
+
+ ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+ if (ret) {
+ pr_err("fast registration failed, ret:%d\n", ret);
+ return ret;
+ }
+ fr_desc->valid = false;
+
+ ib_sge->lkey = fr_desc->data_mr->lkey;
+ ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off;
+ ib_sge->length = data_len;
+
+ pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n",
+ ib_sge->addr, ib_sge->length, ib_sge->lkey);
+
+ return ret;
+}
+
+static int
+isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr)
+{
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_send_wr *send_wr;
+ struct ib_sge *ib_sge;
+ struct scatterlist *sg_start;
+ struct fast_reg_descriptor *fr_desc;
+ u32 sg_off = 0, sg_nents;
+ u32 offset = 0, data_len, data_left, rdma_write_max;
+ int ret = 0, count;
+ unsigned long flags;
+
+ if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+ data_left = se_cmd->data_length;
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ cmd->stat_sn = conn->stat_sn++;
+ } else {
+ sg_off = cmd->write_data_done / PAGE_SIZE;
+ data_left = se_cmd->data_length - cmd->write_data_done;
+ offset = cmd->write_data_done;
+ isert_cmd->tx_desc.isert_cmd = isert_cmd;
+ }
+
+ sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+ sg_nents = se_cmd->t_data_nents - sg_off;
+
+ count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ if (unlikely(!count)) {
+ pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
+ return -EINVAL;
+ }
+ wr->sge = sg_start;
+ wr->num_sge = sg_nents;
+ pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+ isert_cmd, count, sg_start, sg_nents, data_left);
+
+ memset(&wr->s_ib_sge, 0, sizeof(*ib_sge));
+ ib_sge = &wr->s_ib_sge;
+ wr->ib_sge = ib_sge;
+
+ wr->send_wr_num = 1;
+ memset(&wr->s_send_wr, 0, sizeof(*send_wr));
+ wr->send_wr = &wr->s_send_wr;
+
+ wr->isert_cmd = isert_cmd;
+ rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE;
+
+ send_wr = &isert_cmd->rdma_wr.s_send_wr;
+ send_wr->sg_list = ib_sge;
+ send_wr->num_sge = 1;
+ send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+ if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+ send_wr->opcode = IB_WR_RDMA_WRITE;
+ send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
+ send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+ send_wr->send_flags = 0;
+ send_wr->next = &isert_cmd->tx_desc.send_wr;
+ } else {
+ send_wr->opcode = IB_WR_RDMA_READ;
+ send_wr->wr.rdma.remote_addr = isert_cmd->write_va;
+ send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+ send_wr->send_flags = IB_SEND_SIGNALED;
+ }
+
+ data_len = min(data_left, rdma_write_max);
+ wr->cur_rdma_length = data_len;
+
+ spin_lock_irqsave(&isert_conn->conn_lock, flags);
+ fr_desc = list_first_entry(&isert_conn->conn_frwr_pool,
+ struct fast_reg_descriptor, list);
+ list_del(&fr_desc->list);
+ spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+ wr->fr_desc = fr_desc;
+
+ ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn,
+ ib_sge, offset, data_len);
+ if (ret) {
+ list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
+ goto unmap_sg;
+ }
+
+ return 0;
+
+unmap_sg:
+ ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ return ret;
+}
+
+static int
isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
{
struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct isert_device *device = isert_conn->conn_device;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 76565030e4e7..631f2090f0b8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -5,6 +5,7 @@
#include <rdma/rdma_cm.h>
#define ISERT_RDMA_LISTEN_BACKLOG 10
+#define ISCSI_ISER_SG_TABLESIZE 256
enum isert_desc_type {
ISCSI_TX_CONTROL,
@@ -45,16 +46,26 @@ struct iser_tx_desc {
struct ib_send_wr send_wr;
} __packed;
+struct fast_reg_descriptor {
+ struct list_head list;
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ bool valid;
+};
+
struct isert_rdma_wr {
struct list_head wr_list;
struct isert_cmd *isert_cmd;
enum iser_ib_op_code iser_ib_op;
struct ib_sge *ib_sge;
+ struct ib_sge s_ib_sge;
int num_sge;
struct scatterlist *sge;
int send_wr_num;
struct ib_send_wr *send_wr;
+ struct ib_send_wr s_send_wr;
u32 cur_rdma_length;
+ struct fast_reg_descriptor *fr_desc;
};
struct isert_cmd {
@@ -106,6 +117,10 @@ struct isert_conn {
wait_queue_head_t conn_wait;
wait_queue_head_t conn_wait_comp_err;
struct kref conn_kref;
+ struct list_head conn_frwr_pool;
+ int conn_frwr_pool_size;
+ /* lock to protect frwr_pool */
+ spinlock_t conn_lock;
};
#define ISERT_MAX_CQ 64
@@ -118,6 +133,7 @@ struct isert_cq_desc {
};
struct isert_device {
+ int use_frwr;
int cqs_used;
int refcount;
int cq_active_qps[ISERT_MAX_CQ];
@@ -128,6 +144,7 @@ struct isert_device {
struct ib_cq *dev_tx_cq[ISERT_MAX_CQ];
struct isert_cq_desc *cq_desc;
struct list_head dev_node;
+ struct ib_device_attr dev_attr;
int (*reg_rdma_mem)(struct iscsi_conn *conn,
struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);