From b2cbae2c248776d81cc265ff7d48405b6a4cc463 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 20 May 2011 11:46:11 -0700 Subject: RDMA: Add netlink infrastructure Add basic RDMA netlink infrastructure that allows for registration of RDMA clients for which data is to be exported and supplies message construction callbacks. Signed-off-by: Nir Muchtar [ Reorganize a few things, add CONFIG_NET dependency. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 7c03a70c55a2..8349f9c5064c 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_QIB tristate "QLogic PCIe HCA support" - depends on 64BIT && NET + depends on 64BIT ---help--- This is a low-level driver for QLogic PCIe QLE InfiniBand host channel adapters. This driver does not support the QLogic -- cgit v1.2.3 From c337374bf23b88620bcc66a7a09f141cc640f548 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 20 May 2011 16:25:05 +0000 Subject: RDMA/cxgb4: Use completion objects for event blocking There exists a race condition when using wait_queue_head_t objects that are declared on the stack. This was being done in a few places where we are sending work requests to the FW and awaiting replies, but we don't have an endpoint structure with an embedded c4iw_wr_wait struct. So the code was allocating it locally on the stack. Bad design. The race is: 1) thread on cpuX declares the wait_queue_head_t on the stack, then posts a firmware WR with that wait object ptr as the cookie to be returned in the WR reply. This thread will proceed to block in wait_event_timeout() but before it does: 2) An interrupt runs on cpuY with the WR reply. fw6_msg() handles this and calls c4iw_wake_up(). c4iw_wake_up() sets the condition variable in the c4iw_wr_wait object to TRUE and will call wake_up(), but before it calls wake_up(): 3) The thread on cpuX calls c4iw_wait_for_reply(), which calls wait_event_timeout(). The wait_event_timeout() macro checks the condition variable and returns immediately since it is TRUE. So this thread never blocks/sleeps. The function then returns effectively deallocating the c4iw_wr_wait object that was on the stack. 4) So at this point cpuY has a pointer to the c4iw_wr_wait object that is no longer valid. Further its pointing to a stack frame that might now be in use by some other context/thread. So cpuY continues execution and calls wake_up() on a ptr to a wait object that as been effectively deallocated. This race, when it hits, can cause a crash in wake_up(), which I've seen under heavy stress. It can also corrupt the referenced stack which can cause any number of failures. The fix: Use struct completion, which supports on-stack declarations. Completions use a spinlock around setting the condition to true and the wake up so that steps 2 and 4 above are atomic and step 3 can never happen in-between. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 35d2a5dd9bb4..4f045375c8e2 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -131,28 +131,21 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) #define C4IW_WR_TO (10*HZ) -enum { - REPLY_READY = 0, -}; - struct c4iw_wr_wait { - wait_queue_head_t wait; - unsigned long status; + struct completion completion; int ret; }; static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) { wr_waitp->ret = 0; - wr_waitp->status = 0; - init_waitqueue_head(&wr_waitp->wait); + init_completion(&wr_waitp->completion); } static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) { wr_waitp->ret = ret; - set_bit(REPLY_READY, &wr_waitp->status); - wake_up(&wr_waitp->wait); + complete(&wr_waitp->completion); } static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, @@ -164,8 +157,7 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, int ret; do { - ret = wait_event_timeout(wr_waitp->wait, - test_and_clear_bit(REPLY_READY, &wr_waitp->status), to); + ret = wait_for_completion_timeout(&wr_waitp->completion, to); if (!ret) { printk(KERN_ERR MOD "%s - Device %s not responding - " "tid %u qpid %u\n", func, -- cgit v1.2.3 From 807838686eb9e40d73b8a3f2384881358f51fff0 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 13 May 2011 18:37:18 +0000 Subject: RDMA/cxgb3: Don't post zero-byte read if endpoint is going away tx_ack() wasn't checking the endpoint state and consequently would attempt to post the p2p 0B read on an endpoint/QP that is closing or aborting. This causes a NULL pointer dereference crash. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 26 +++++++++++++++++--------- drivers/infiniband/hw/cxgb3/iwch_provider.h | 2 +- drivers/infiniband/hw/cxgb3/iwch_qp.c | 6 +++--- 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3216bcad7e82..ad998c0b51e2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -913,7 +913,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) goto err; if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep->com.qp); + iwch_post_zb_read(ep); } goto out; @@ -1077,6 +1077,8 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); + unsigned long flags; + int post_zb = 0; PDBG("%s ep %p credits %u\n", __func__, ep, credits); @@ -1086,28 +1088,34 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } + spin_lock_irqsave(&ep->com.lock, flags); BUG_ON(credits != 1); dst_confirm(ep->dst); if (!ep->mpa_skb) { PDBG("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); + __func__, ep, ep->com.state); if (ep->mpa_attr.initiator) { PDBG("%s initiator ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - if (peer2peer) - iwch_post_zb_read(ep->com.qp); + __func__, ep, ep->com.state); + if (peer2peer && ep->com.state == FPDU_MODE) + post_zb = 1; } else { PDBG("%s responder ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + __func__, ep, ep->com.state); + if (ep->com.state == MPA_REQ_RCVD) { + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } } } else { PDBG("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, state_read(&ep->com)); + __func__, ep, ep->com.state); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (post_zb) + iwch_post_zb_read(ep); return CPL_RET_BUF_DONE; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index c5406da3f4cd..9a342c9b220d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -332,7 +332,7 @@ int iwch_bind_mw(struct ib_qp *qp, struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_qp *qhp); +int iwch_post_zb_read(struct iwch_ep *ep); int iwch_register_device(struct iwch_dev *dev); void iwch_unregister_device(struct iwch_dev *dev); void stop_read_rep_timer(struct iwch_qp *qhp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 1b4cd09f74dc..ecd313f359a4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -738,7 +738,7 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg, } } -int iwch_post_zb_read(struct iwch_qp *qhp) +int iwch_post_zb_read(struct iwch_ep *ep) { union t3_wr *wqe; struct sk_buff *skb; @@ -761,10 +761,10 @@ int iwch_post_zb_read(struct iwch_qp *qhp) wqe->read.local_len = cpu_to_be32(0); wqe->read.local_to = cpu_to_be64(1); wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)| + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| V_FW_RIWR_LEN(flit_cnt)); skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); } /* -- cgit v1.2.3 From 52f81dbaf1378faf64c3ecea5129cebf826ef126 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Fri, 1 Apr 2011 06:23:49 +0000 Subject: RDMA/nes: Add a check for strict_strtoul() It should check if strict_strtoul() succeeds before using 'wqm_quanta_value'. Signed-off-by: Liu Yuan [ Convert to kstrtoul() directly while we're here. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 13de1192927c..2d668c69f6d9 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1138,7 +1138,9 @@ static ssize_t nes_store_wqm_quanta(struct device_driver *ddp, u32 i = 0; struct nes_device *nesdev; - strict_strtoul(buf, 0, &wqm_quanta_value); + if (kstrtoul(buf, 0, &wqm_quanta_value) < 0) + return -EINVAL; + list_for_each_entry(nesdev, &nes_dev_list, list) { if (i == ee_flsh_adapter) { nesdev->nesadapter->wqm_quanta = wqm_quanta_value; -- cgit v1.2.3