diff options
Diffstat (limited to 'drivers/infiniband')
29 files changed, 394 insertions, 332 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d294bbc42f09..1205e8027829 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -35,6 +35,7 @@ #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> +#include <net/netevent.h> #include <rdma/ib_addr.h> MODULE_AUTHOR("Sean Hefty"); @@ -326,25 +327,22 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); -static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pkt, struct net_device *orig_dev) +static int netevent_callback(struct notifier_block *self, unsigned long event, + void *ctx) { - struct arphdr *arp_hdr; + if (event == NETEVENT_NEIGH_UPDATE) { + struct neighbour *neigh = ctx; - arp_hdr = (struct arphdr *) skb->nh.raw; - - if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || - arp_hdr->ar_op == htons(ARPOP_REPLY)) - set_timeout(jiffies); - - kfree_skb(skb); + if (neigh->dev->type == ARPHRD_INFINIBAND && + (neigh->nud_state & NUD_VALID)) { + set_timeout(jiffies); + } + } return 0; } -static struct packet_type addr_arp = { - .type = __constant_htons(ETH_P_ARP), - .func = addr_arp_recv, - .af_packet_priv = (void*) 1, +static struct notifier_block nb = { + .notifier_call = netevent_callback }; static int addr_init(void) @@ -353,13 +351,13 @@ static int addr_init(void) if (!addr_wq) return -ENOMEM; - dev_add_pack(&addr_arp); + register_netevent_notifier(&nb); return 0; } static void addr_cleanup(void) { - dev_remove_pack(&addr_arp); + unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e05ca2cdc73f..75313ade2e0d 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -301,7 +301,8 @@ static void ib_cache_event(struct ib_event_handler *handler, event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_SM_CHANGE) { + event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER) { work = kmalloc(sizeof *work, GFP_ATOMIC); if (work) { INIT_WORK(&work->work, ib_cache_task, work); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 3f6705f3083a..0de335b7bfc2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -701,7 +701,7 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } -void ib_destroy_cm_id(struct ib_cm_id *cm_id) +static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; @@ -735,12 +735,22 @@ retest: sizeof cm_id_priv->av.port->cm_dev->ca_guid, NULL, 0); break; + case IB_CM_REQ_RCVD: + if (err == -ENOMEM) { + /* Do not reject to allow future retries. */ + cm_reset_to_idle(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + } else { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + } + break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ - case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -775,6 +785,11 @@ retest: kfree(cm_id_priv->private_data); kfree(cm_id_priv); } + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + cm_destroy_id(cm_id, 0); +} EXPORT_SYMBOL(ib_destroy_cm_id); int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, @@ -960,8 +975,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); - if (IS_ERR(cm_id_priv->timewait_info)) + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); goto out; + } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); if (ret) @@ -1163,7 +1180,7 @@ static void cm_process_work(struct cm_id_private *cm_id_priv, } cm_deref_id(cm_id_priv); if (ret) - ib_destroy_cm_id(&cm_id_priv->id); + cm_destroy_id(&cm_id_priv->id, ret); } static void cm_format_mra(struct cm_mra_msg *mra_msg, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 863f64befc7c..5d625a81193f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -49,7 +49,7 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 -#define CMA_MAX_CM_RETRIES 3 +#define CMA_MAX_CM_RETRIES 15 static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -262,14 +262,14 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv) static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; - union ib_gid *gid; + union ib_gid gid; int ret = -ENODEV; - gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid), mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, gid, + ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); if (!ret) { cma_attach_to_dev(id_priv, cma_dev); @@ -812,6 +812,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) cma_modify_qp_err(&id_priv->id); status = ib_event->param.rej_rcvd.reason; event = RDMA_CM_EVENT_REJECTED; + private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", @@ -1134,8 +1135,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct ib_sa_path_rec path_rec; memset(&path_rec, 0, sizeof path_rec); - path_rec.sgid = *ib_addr_get_sgid(addr); - path_rec.dgid = *ib_addr_get_dgid(addr); + ib_addr_get_sgid(addr, &path_rec.sgid); + ib_addr_get_dgid(addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); path_rec.numb_path = 1; @@ -1263,7 +1264,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; struct ib_port_attr port_attr; - union ib_gid *gid; + union ib_gid gid; u16 pkey; int ret; u8 p; @@ -1284,8 +1285,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) } port_found: - gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); - ret = ib_get_cached_gid(cma_dev->device, p, 0, gid); + ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); if (ret) goto out; @@ -1293,6 +1293,7 @@ port_found: if (ret) goto out; + ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); @@ -1339,6 +1340,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; struct sockaddr_in *src_in, *dst_in; + union ib_gid gid; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); @@ -1351,8 +1353,8 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) goto err; } - ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr)); + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) { src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 615fe9cc6c56..86a3b2d401db 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -426,7 +426,7 @@ EXPORT_SYMBOL(ib_flush_fmr_pool); struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, u64 *page_list, int list_len, - u64 *io_virtual_address) + u64 io_virtual_address) { struct ib_fmr_pool *pool = pool_handle; struct ib_pool_fmr *fmr; @@ -440,7 +440,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, fmr = ib_fmr_cache_lookup(pool, page_list, list_len, - *io_virtual_address); + io_virtual_address); if (fmr) { /* found in cache */ ++fmr->ref_count; @@ -464,7 +464,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, spin_unlock_irqrestore(&pool->pool_lock, flags); result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, - *io_virtual_address); + io_virtual_address); if (result) { spin_lock_irqsave(&pool->pool_lock, flags); @@ -481,7 +481,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, fmr->ref_count = 1; if (pool->cache_bucket) { - fmr->io_virtual_address = *io_virtual_address; + fmr->io_virtual_address = io_virtual_address; fmr->page_list_len = list_len; memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 5ed4dab52a6f..1c3cfbbe6a97 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -167,6 +167,15 @@ static int is_vendor_method_in_use( return 0; } +int ib_response_mad(struct ib_mad *mad) +{ + return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) || + (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) && + (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP))); +} +EXPORT_SYMBOL(ib_response_mad); + /* * ib_register_mad_agent - Register to send/receive MADs */ @@ -570,13 +579,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) } EXPORT_SYMBOL(ib_unregister_mad_agent); -static inline int response_mad(struct ib_mad *mad) -{ - /* Trap represses are responses although response bit is reset */ - return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); -} - static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; @@ -723,7 +725,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (response_mad(&mad_priv->mad.mad) && + if (ib_response_mad(&mad_priv->mad.mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -1551,7 +1553,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - if (response_mad(mad)) { + if (ib_response_mad(mad)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1799,7 +1801,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } /* Complete corresponding request */ - if (response_mad(mad_recv_wc->recv_buf.mad)) { + if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e911c99ff843..d6b84226bba7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -405,7 +405,8 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_SM_CHANGE) { + event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER) { struct ib_sa_device *sa_dev; sa_dev = container_of(handler, typeof(*sa_dev), event_handler); @@ -488,13 +489,13 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) spin_unlock_irqrestore(&tid_lock, flags); } -static int send_mad(struct ib_sa_query *query, int timeout_ms) +static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { unsigned long flags; int ret, id; retry: - if (!idr_pre_get(&query_idr, GFP_ATOMIC)) + if (!idr_pre_get(&query_idr, gfp_mask)) return -ENOMEM; spin_lock_irqsave(&idr_lock, flags); ret = idr_get_new(&query_idr, query, &id); @@ -630,7 +631,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, *sa_query = &query->sa_query; - ret = send_mad(&query->sa_query, timeout_ms); + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; @@ -752,7 +753,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, *sa_query = &query->sa_query; - ret = send_mad(&query->sa_query, timeout_ms); + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; @@ -844,7 +845,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, *sa_query = &query->sa_query; - ret = send_mad(&query->sa_query, timeout_ms); + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index afe70a549c2f..1273f8807e84 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -112,8 +112,10 @@ struct ib_umad_device { struct ib_umad_file { struct ib_umad_port *port; struct list_head recv_list; + struct list_head send_list; struct list_head port_list; spinlock_t recv_lock; + spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; int agents_dead; @@ -177,12 +179,21 @@ static int queue_packet(struct ib_umad_file *file, return ret; } +static void dequeue_send(struct ib_umad_file *file, + struct ib_umad_packet *packet) + { + spin_lock_irq(&file->send_lock); + list_del(&packet->list); + spin_unlock_irq(&file->send_lock); + } + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; + dequeue_send(file, packet); ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); @@ -370,6 +381,51 @@ static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) return 0; } +static int same_destination(struct ib_user_mad_hdr *hdr1, + struct ib_user_mad_hdr *hdr2) +{ + if (!hdr1->grh_present && !hdr2->grh_present) + return (hdr1->lid == hdr2->lid); + + if (hdr1->grh_present && hdr2->grh_present) + return !memcmp(hdr1->gid, hdr2->gid, 16); + + return 0; +} + +static int is_duplicate(struct ib_umad_file *file, + struct ib_umad_packet *packet) +{ + struct ib_umad_packet *sent_packet; + struct ib_mad_hdr *sent_hdr, *hdr; + + hdr = (struct ib_mad_hdr *) packet->mad.data; + list_for_each_entry(sent_packet, &file->send_list, list) { + sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; + + if ((hdr->tid != sent_hdr->tid) || + (hdr->mgmt_class != sent_hdr->mgmt_class)) + continue; + + /* + * No need to be overly clever here. If two new operations have + * the same TID, reject the second as a duplicate. This is more + * restrictive than required by the spec. + */ + if (!ib_response_mad((struct ib_mad *) hdr)) { + if (!ib_response_mad((struct ib_mad *) sent_hdr)) + return 1; + continue; + } else if (!ib_response_mad((struct ib_mad *) sent_hdr)) + continue; + + if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) + return 1; + } + + return 0; +} + static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -379,7 +435,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_ah_attr ah_attr; struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; - u8 method; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; @@ -473,28 +528,36 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, } /* - * If userspace is generating a request that will generate a - * response, we need to make sure the high-order part of the - * transaction ID matches the agent being used to send the - * MAD. + * Set the high-order part of the transaction ID to make MADs from + * different agents unique, and allow routing responses back to the + * original requestor. */ - method = ((struct ib_mad_hdr *) packet->msg->mad)->method; - - if (!(method & IB_MGMT_METHOD_RESP) && - method != IB_MGMT_METHOD_TRAP_REPRESS && - method != IB_MGMT_METHOD_SEND) { + if (!ib_response_mad(packet->msg->mad)) { tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); + rmpp_mad->mad_hdr.tid = *tid; + } + + spin_lock_irq(&file->send_lock); + ret = is_duplicate(file, packet); + if (!ret) + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); + if (ret) { + ret = -EINVAL; + goto err_msg; } ret = ib_post_send_mad(packet->msg, NULL); if (ret) - goto err_msg; + goto err_send; up_read(&file->port->mutex); return count; +err_send: + dequeue_send(file, packet); err_msg: ib_free_send_mad(packet->msg); err_ah: @@ -657,7 +720,9 @@ static int ib_umad_open(struct inode *inode, struct file *filp) } spin_lock_init(&file->recv_lock); + spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); + INIT_LIST_HEAD(&file->send_list); init_waitqueue_head(&file->recv_wait); file->port = port; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index bb9bee56a824..102a59c033ff 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -42,6 +42,7 @@ #include <linux/kref.h> #include <linux/idr.h> #include <linux/mutex.h> +#include <linux/completion.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> @@ -69,6 +70,7 @@ struct ib_uverbs_device { struct kref ref; + struct completion comp; int devnum; struct cdev *dev; struct class_device *class_dev; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bdf5d5098190..30923eb68ec7 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -42,6 +42,13 @@ #include "uverbs.h" +static struct lock_class_key pd_lock_key; +static struct lock_class_key mr_lock_key; +static struct lock_class_key cq_lock_key; +static struct lock_class_key qp_lock_key; +static struct lock_class_key ah_lock_key; +static struct lock_class_key srq_lock_key; + #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->inbuf = (void __user *) (ibuf); \ @@ -76,12 +83,13 @@ */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context) + struct ib_ucontext *context, struct lock_class_key *key) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); + lockdep_set_class(&uobj->mutex, key); uobj->live = 0; } @@ -470,7 +478,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext); + init_uobj(uobj, 0, file->ucontext, &pd_lock_key); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -591,7 +599,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, 0, file->ucontext); + init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); down_write(&obj->uobject.mutex); /* @@ -770,7 +778,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext); + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); down_write(&obj->uobject.mutex); if (cmd.comp_channel >= 0) { @@ -1051,13 +1059,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); down_write(&obj->uevent.uobject.mutex); + srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; pd = idr_read_pd(cmd.pd_handle, file->ucontext); scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); - rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext); - srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; + rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext); if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { ret = -EINVAL; @@ -1125,7 +1134,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, put_pd_read(pd); put_cq_read(scq); - put_cq_read(rcq); + if (rcq != scq) + put_cq_read(rcq); if (srq) put_srq_read(srq); @@ -1150,7 +1160,7 @@ err_put: put_pd_read(pd); if (scq) put_cq_read(scq); - if (rcq) + if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); @@ -1751,7 +1761,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, cmd.user_handle, file->ucontext); + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1775,7 +1785,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, ah = ib_create_ah(pd, &attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); - goto err; + goto err_put; } ah->uobject = uobj; @@ -1811,6 +1821,9 @@ err_copy: err_destroy: ib_destroy_ah(ah); +err_put: + put_pd_read(pd); + err: put_uobj_write(uobj); return ret; @@ -1963,7 +1976,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext); + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); down_write(&obj->uobject.mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1984,7 +1997,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, srq = pd->device->create_srq(pd, &attr, &udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); - goto err; + goto err_put; } srq->device = pd->device; @@ -2029,6 +2042,9 @@ err_copy: err_destroy: ib_destroy_srq(srq); +err_put: + put_pd_read(pd); + err: put_uobj_write(&obj->uobject); return ret; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e725cccc7cde..4e16314e8e6d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -122,7 +122,7 @@ static void ib_uverbs_release_dev(struct kref *ref) struct ib_uverbs_device *dev = container_of(ref, struct ib_uverbs_device, ref); - kfree(dev); + complete(&dev->comp); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, @@ -740,6 +740,7 @@ static void ib_uverbs_add_one(struct ib_device *device) return; kref_init(&uverbs_dev->ref); + init_completion(&uverbs_dev->comp); spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -793,6 +794,8 @@ err_cdev: err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + wait_for_completion(&uverbs_dev->comp); + kfree(uverbs_dev); return; } @@ -812,7 +815,10 @@ static void ib_uverbs_remove_one(struct ib_device *device) spin_unlock(&map_lock); clear_bit(uverbs_dev->devnum, dev_map); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + wait_for_completion(&uverbs_dev->comp); + kfree(uverbs_dev); } static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 823131d58b34..f98518d912b5 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -859,6 +859,38 @@ static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail, __ipath_layer_rcv_lid(dd, hdr); } +static void ipath_rcv_hdrerr(struct ipath_devdata *dd, + u32 eflags, + u32 l, + u32 etail, + u64 *rc) +{ + char emsg[128]; + struct ipath_message_header *hdr; + + get_rhf_errstring(eflags, emsg, sizeof emsg); + hdr = (struct ipath_message_header *)&rc[1]; + ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " + "tlen=%x opcode=%x egridx=%x: %s\n", + eflags, l, + ipath_hdrget_rcv_type((__le32 *) rc), + ipath_hdrget_length_in_bytes((__le32 *) rc), + be32_to_cpu(hdr->bth[0]) >> 24, + etail, emsg); + + /* Count local link integrity errors. */ + if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { + u8 n = (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + + if (++dd->ipath_lli_counter > n) { + dd->ipath_lli_counter = 0; + dd->ipath_lli_errors++; + } + } +} + /* * ipath_kreceive - receive a packet * @dd: the infinipath device @@ -875,7 +907,6 @@ void ipath_kreceive(struct ipath_devdata *dd) struct ipath_message_header *hdr; u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0; static u64 totcalls; /* stats, may eventually remove */ - char emsg[128]; if (!dd->ipath_hdrqtailptr) { ipath_dev_err(dd, @@ -938,26 +969,9 @@ reloop: "%x\n", etype); } - if (eflags & ~(INFINIPATH_RHF_H_TIDERR | - INFINIPATH_RHF_H_IHDRERR)) { - get_rhf_errstring(eflags, emsg, sizeof emsg); - ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " - "tlen=%x opcode=%x egridx=%x: %s\n", - eflags, l, etype, tlen, bthbytes[0], - ipath_hdrget_index((__le32 *) rc), emsg); - /* Count local link integrity errors. */ - if (eflags & (INFINIPATH_RHF_H_ICRCERR | - INFINIPATH_RHF_H_VCRCERR)) { - u8 n = (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - - if (++dd->ipath_lli_counter > n) { - dd->ipath_lli_counter = 0; - dd->ipath_lli_errors++; - } - } - } else if (etype == RCVHQ_RCV_TYPE_NON_KD) { + if (unlikely(eflags)) + ipath_rcv_hdrerr(dd, eflags, l, etail, rc); + else if (etype == RCVHQ_RCV_TYPE_NON_KD) { int ret = __ipath_verbs_rcv(dd, rc + 1, ebuf, tlen); if (ret == -ENODEV) @@ -981,25 +995,7 @@ reloop: else if (etype == RCVHQ_RCV_TYPE_EXPECTED) ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", be32_to_cpu(hdr->bth[0]) & 0xff); - else if (eflags & (INFINIPATH_RHF_H_TIDERR | - INFINIPATH_RHF_H_IHDRERR)) { - /* - * This is a type 3 packet, only the LRH is in the - * rcvhdrq, the rest of the header is in the eager - * buffer. - */ - u8 opcode; - if (ebuf) { - bthbytes = (u8 *) ebuf; - opcode = *bthbytes; - } - else - opcode = 0; - get_rhf_errstring(eflags, emsg, sizeof emsg); - ipath_dbg("Err %x (%s), opcode %x, egrbuf %x, " - "len %x\n", eflags, emsg, opcode, etail, - tlen); - } else { + else { /* * error packet, type of error unknown. * Probably type 3, but we don't know, so don't diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 46773c673a1a..a5ca279370aa 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -197,6 +197,21 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, size_t off; int ret; + /* + * We use RKEY == zero for physical addresses + * (see ipath_get_dma_mr). + */ + if (rkey == 0) { + sge->mr = NULL; + sge->vaddr = phys_to_virt(vaddr); + sge->length = len; + sge->sge_length = len; + ss->sg_list = NULL; + ss->num_sge = 1; + ret = 1; + goto bail; + } + mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; if (unlikely(mr == NULL || mr->lkey != rkey)) { ret = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 56ac336dd1ec..d70a9b6b5239 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -191,10 +191,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) { struct ipath_sge *sge = &ss->sge; - while (length > sge->sge_length) { - length -= sge->sge_length; - ss->sge = *ss->sg_list++; - } while (length) { u32 len = sge->length; @@ -627,6 +623,7 @@ static int ipath_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID; + props->page_size_cap = PAGE_SIZE; props->vendor_id = ipath_layer_get_vendorid(dev->dd); props->vendor_part_id = ipath_layer_get_deviceid(dev->dd); props->hw_ver = ipath_layer_get_pcirev(dev->dd); diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index 9ba3211cef7c..f930e55b58fc 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -41,9 +41,11 @@ /* Trivial bitmap-based allocator */ u32 mthca_alloc(struct mthca_alloc *alloc) { + unsigned long flags; u32 obj; - spin_lock(&alloc->lock); + spin_lock_irqsave(&alloc->lock, flags); + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); if (obj >= alloc->max) { alloc->top = (alloc->top + alloc->max) & alloc->mask; @@ -56,19 +58,24 @@ u32 mthca_alloc(struct mthca_alloc *alloc) } else obj = -1; - spin_unlock(&alloc->lock); + spin_unlock_irqrestore(&alloc->lock, flags); return obj; } void mthca_free(struct mthca_alloc *alloc, u32 obj) { + unsigned long flags; + obj &= alloc->max - 1; - spin_lock(&alloc->lock); + + spin_lock_irqsave(&alloc->lock, flags); + clear_bit(obj, alloc->table); alloc->last = min(alloc->last, obj); alloc->top = (alloc->top + alloc->max) & alloc->mask; - spin_unlock(&alloc->lock); + + spin_unlock_irqrestore(&alloc->lock, flags); } int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, @@ -108,14 +115,15 @@ void mthca_alloc_cleanup(struct mthca_alloc *alloc) * serialize access to the array. */ +#define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1) + void *mthca_array_get(struct mthca_array *array, int index) { int p = (index * sizeof (void *)) >> PAGE_SHIFT; - if (array->page_list[p].page) { - int i = index & (PAGE_SIZE / sizeof (void *) - 1); - return array->page_list[p].page[i]; - } else + if (array->page_list[p].page) + return array->page_list[p].page[index & MTHCA_ARRAY_MASK]; + else return NULL; } @@ -130,8 +138,7 @@ int mthca_array_set(struct mthca_array *array, int index, void *value) if (!array->page_list[p].page) return -ENOMEM; - array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] = - value; + array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value; ++array->page_list[p].used; return 0; @@ -144,7 +151,8 @@ void mthca_array_clear(struct mthca_array *array, int index) if (--array->page_list[p].used == 0) { free_page((unsigned long) array->page_list[p].page); array->page_list[p].page = NULL; - } + } else + array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL; if (array->page_list[p].used < 0) pr_debug("Array %p index %d page %d with ref count %d < 0\n", diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index b12aa03be251..e215041b2db9 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -303,9 +303,10 @@ int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr) memset(attr, 0, sizeof *attr); attr->dlid = be16_to_cpu(ah->av->dlid); attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; - attr->static_rate = ah->av->msg_sr & 0x7; - attr->src_path_bits = ah->av->g_slid & 0x7F; attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24; + attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7, + attr->port_num); + attr->src_path_bits = ah->av->g_slid & 0x7F; attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0; if (attr->ah_flags) { diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index d0f7731802c9..deabc14b4ea4 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -778,11 +778,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) ((dev->fw_ver & 0xffff0000ull) >> 16) | ((dev->fw_ver & 0x0000ffffull) << 16); + MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); + dev->cmd.max_cmds = 1 << lg; + mthca_dbg(dev, "FW version %012llx, max commands %d\n", (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); - MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); - dev->cmd.max_cmds = 1 << lg; MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 557cde3a4563..7b82c1907f04 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -967,12 +967,12 @@ static struct { } mthca_hca_table[] = { [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), .flags = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 400), + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600), .flags = MTHCA_FLAG_PCIE }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, - [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 800), + [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE | MTHCA_FLAG_SINAI_OPT } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 230ae21db8fd..265b1d1c4a62 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1287,11 +1287,7 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -1316,6 +1312,11 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.query_srq = mthca_query_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); if (mthca_is_memfree(dev)) dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 8de2887ba15c..9a5bece3fa5c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -136,8 +136,8 @@ struct mthca_ah { * We have one global lock that protects dev->cq/qp_table. Each * struct mthca_cq/qp also has its own lock. An individual qp lock * may be taken inside of an individual cq lock. Both cqs attached to - * a qp may be locked, with the send cq locked first. No other - * nesting should be done. + * a qp may be locked, with the cq with the lower cqn locked first. + * No other nesting should be done. * * Each struct mthca_cq/qp also has an ref count, protected by the * corresponding table lock. The pointer from the cq/qp_table to the diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 16c387d8170c..2e8f6f36e0a5 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -99,6 +99,10 @@ enum { MTHCA_QP_BIT_RSC = 1 << 3 }; +enum { + MTHCA_SEND_DOORBELL_FENCE = 1 << 5 +}; + struct mthca_qp_path { __be32 port_pkey; u8 rnr_retry; @@ -222,9 +226,8 @@ static void *get_send_wqe(struct mthca_qp *qp, int n) (PAGE_SIZE - 1)); } -static void mthca_wq_init(struct mthca_wq *wq) +static void mthca_wq_reset(struct mthca_wq *wq) { - spin_lock_init(&wq->lock); wq->next_ind = 0; wq->last_comp = wq->max - 1; wq->head = 0; @@ -845,10 +848,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); - mthca_wq_init(&qp->sq); + mthca_wq_reset(&qp->sq); qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); - mthca_wq_init(&qp->rq); + mthca_wq_reset(&qp->rq); qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); if (mthca_is_memfree(dev)) { @@ -1112,8 +1115,11 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, qp->atomic_rd_en = 0; qp->resp_depth = 0; qp->sq_policy = send_policy; - mthca_wq_init(&qp->sq); - mthca_wq_init(&qp->rq); + mthca_wq_reset(&qp->sq); + mthca_wq_reset(&qp->rq); + + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); ret = mthca_map_memfree(dev, qp); if (ret) @@ -1257,6 +1263,32 @@ int mthca_alloc_qp(struct mthca_dev *dev, return 0; } +static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_lock_irq(&send_cq->lock); + else if (send_cq->cqn < recv_cq->cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_unlock_irq(&send_cq->lock); + else if (send_cq->cqn < recv_cq->cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -1309,17 +1341,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev, * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ - spin_lock_irq(&send_cq->lock); - if (send_cq != recv_cq) - spin_lock(&recv_cq->lock); + mthca_lock_cqs(send_cq, recv_cq); spin_lock(&dev->qp_table.lock); mthca_array_clear(&dev->qp_table.qp, mqpn); spin_unlock(&dev->qp_table.lock); - if (send_cq != recv_cq) - spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + mthca_unlock_cqs(send_cq, recv_cq); err_out: dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, @@ -1353,9 +1381,7 @@ void mthca_free_qp(struct mthca_dev *dev, * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ - spin_lock_irq(&send_cq->lock); - if (send_cq != recv_cq) - spin_lock(&recv_cq->lock); + mthca_lock_cqs(send_cq, recv_cq); spin_lock(&dev->qp_table.lock); mthca_array_clear(&dev->qp_table.qp, @@ -1363,9 +1389,7 @@ void mthca_free_qp(struct mthca_dev *dev, --qp->refcount; spin_unlock(&dev->qp_table.lock); - if (send_cq != recv_cq) - spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + mthca_unlock_cqs(send_cq, recv_cq); wait_event(qp->wait, !get_qp_refcount(dev, qp)); @@ -1500,7 +1524,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int i; int size; int size0 = 0; - u32 f0 = 0; + u32 f0; int ind; u8 op0 = 0; @@ -1684,6 +1708,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!size0) { size0 = size; op0 = mthca_opcode[wr->opcode]; + f0 = wr->send_flags & IB_SEND_FENCE ? + MTHCA_SEND_DOORBELL_FENCE : 0; } ++ind; @@ -1841,7 +1867,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int i; int size; int size0 = 0; - u32 f0 = 0; + u32 f0; int ind; u8 op0 = 0; @@ -2049,6 +2075,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!size0) { size0 = size; op0 = mthca_opcode[wr->opcode]; + f0 = wr->send_flags & IB_SEND_FENCE ? + MTHCA_SEND_DOORBELL_FENCE : 0; } ++ind; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index fab417c5cf43..b60a9d79ae54 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -370,7 +370,8 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return -EINVAL; if (attr_mask & IB_SRQ_LIMIT) { - if (attr->srq_limit > srq->max) + u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max; + if (attr->srq_limit > max_wr) return -EINVAL; mutex_lock(&srq->mutex); diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 13d6d01c72c0..d74653d7de1c 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -6,8 +6,7 @@ config INFINIBAND_IPOIB transports IP packets over InfiniBand so you can use your IB device as a fancy NIC. - The IPoIB protocol is defined by the IETF ipoib working - group: <http://www.ietf.org/html.charters/ipoib-charter.html>. + See Documentation/infiniband/ipoib.txt for more information config INFINIBAND_IPOIB_DEBUG bool "IP-over-InfiniBand debugging" if EMBEDDED diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 3f89f5e19036..474aa214ab57 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -212,6 +212,7 @@ struct ipoib_path { struct ipoib_neigh { struct ipoib_ah *ah; + union ib_gid dgid; struct sk_buff_head queue; struct neighbour *neighbour; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 1c6ea1c682a5..cf71d2a5515c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -404,6 +404,8 @@ static void path_rec_completion(int status, list_for_each_entry(neigh, &path->neigh_list, list) { kref_get(&path->ah->ref); neigh->ah = path->ah; + memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, + sizeof(union ib_gid)); while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); @@ -510,6 +512,8 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) if (path->ah) { kref_get(&path->ah->ref); neigh->ah = path->ah; + memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, + sizeof(union ib_gid)); ipoib_send(dev, skb, path->ah, be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); @@ -633,6 +637,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) neigh = *to_ipoib_neigh(skb->dst->neighbour); if (likely(neigh->ah)) { + if (unlikely(memcmp(&neigh->dgid.raw, + skb->dst->neighbour->ha + 4, + sizeof(union ib_gid)))) { + spin_lock(&priv->lock); + /* + * It's safe to call ipoib_put_ah() inside + * priv->lock here, because we know that + * path->ah will always hold one more reference, + * so ipoib_put_ah() will never do more than + * decrement the ref count. + */ + ipoib_put_ah(neigh->ah); + list_del(&neigh->list); + ipoib_neigh_free(neigh); + spin_unlock(&priv->lock); + ipoib_path_lookup(skb, dev); + goto out; + } + ipoib_send(dev, skb, neigh->ah, be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); goto out; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index ab40488182b3..ec356ce7cdcd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -264,6 +264,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (!ah) { ipoib_warn(priv, "ib_address_create failed\n"); } else { + spin_lock_irq(&priv->lock); + mcast->ah = ah; + spin_unlock_irq(&priv->lock); + ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT " AV %p, LID 0x%04x, SL %d\n", IPOIB_GID_ARG(mcast->mcmember.mgid), @@ -271,10 +275,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, be16_to_cpu(mcast->mcmember.mlid), mcast->mcmember.sl); } - - spin_lock_irq(&priv->lock); - mcast->ah = ah; - spin_unlock_irq(&priv->lock); } /* actually send any queued packets */ @@ -326,6 +326,7 @@ ipoib_mcast_sendonly_join_complete(int status, /* Clear the busy flag so we try again */ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + mcast->query = NULL; } complete(&mcast->done); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index b2c033edb03c..1437d7ee3b19 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -378,21 +378,6 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) return iser_conn_set_full_featured_mode(conn); } -static void -iscsi_iser_conn_terminate(struct iscsi_conn *conn) -{ - struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_conn *ib_conn = iser_conn->ib_conn; - - BUG_ON(!ib_conn); - /* starts conn teardown process, waits until all previously * - * posted buffers get flushed, deallocates all conn resources */ - iser_conn_terminate(ib_conn); - iser_conn->ib_conn = NULL; - conn->recv_lock = NULL; -} - - static struct iscsi_transport iscsi_iser_transport; static struct iscsi_cls_session * @@ -437,159 +422,50 @@ iscsi_iser_session_create(struct iscsi_transport *iscsit, } static int -iscsi_iser_conn_set_param(struct iscsi_cls_conn *cls_conn, - enum iscsi_param param, uint32_t value) +iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, char *buf, int buflen) { - struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_session *session = conn->session; - - spin_lock_bh(&session->lock); - if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && - conn->stop_stage != STOP_CONN_RECOVER) { - printk(KERN_ERR "iscsi_iser: can not change parameter [%d]\n", - param); - spin_unlock_bh(&session->lock); - return 0; - } - spin_unlock_bh(&session->lock); + int value; switch (param) { case ISCSI_PARAM_MAX_RECV_DLENGTH: /* TBD */ break; - case ISCSI_PARAM_MAX_XMIT_DLENGTH: - conn->max_xmit_dlength = value; - break; case ISCSI_PARAM_HDRDGST_EN: + sscanf(buf, "%d", &value); if (value) { printk(KERN_ERR "DataDigest wasn't negotiated to None"); return -EPROTO; } break; case ISCSI_PARAM_DATADGST_EN: + sscanf(buf, "%d", &value); if (value) { printk(KERN_ERR "DataDigest wasn't negotiated to None"); return -EPROTO; } break; - case ISCSI_PARAM_INITIAL_R2T_EN: - session->initial_r2t_en = value; - break; - case ISCSI_PARAM_IMM_DATA_EN: - session->imm_data_en = value; - break; - case ISCSI_PARAM_FIRST_BURST: - session->first_burst = value; - break; - case ISCSI_PARAM_MAX_BURST: - session->max_burst = value; - break; - case ISCSI_PARAM_PDU_INORDER_EN: - session->pdu_inorder_en = value; - break; - case ISCSI_PARAM_DATASEQ_INORDER_EN: - session->dataseq_inorder_en = value; - break; - case ISCSI_PARAM_ERL: - session->erl = value; - break; case ISCSI_PARAM_IFMARKER_EN: + sscanf(buf, "%d", &value); if (value) { printk(KERN_ERR "IFMarker wasn't negotiated to No"); return -EPROTO; } break; case ISCSI_PARAM_OFMARKER_EN: + sscanf(buf, "%d", &value); if (value) { printk(KERN_ERR "OFMarker wasn't negotiated to No"); return -EPROTO; } break; default: - break; + return iscsi_set_param(cls_conn, param, buf, buflen); } return 0; } -static int -iscsi_iser_session_get_param(struct iscsi_cls_session *cls_session, - enum iscsi_param param, uint32_t *value) -{ - struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); - struct iscsi_session *session = iscsi_hostdata(shost->hostdata); - - switch (param) { - case ISCSI_PARAM_INITIAL_R2T_EN: - *value = session->initial_r2t_en; - break; - case ISCSI_PARAM_MAX_R2T: - *value = session->max_r2t; - break; - case ISCSI_PARAM_IMM_DATA_EN: - *value = session->imm_data_en; - break; - case ISCSI_PARAM_FIRST_BURST: - *value = session->first_burst; - break; - case ISCSI_PARAM_MAX_BURST: - *value = session->max_burst; - break; - case ISCSI_PARAM_PDU_INORDER_EN: - *value = session->pdu_inorder_en; - break; - case ISCSI_PARAM_DATASEQ_INORDER_EN: - *value = session->dataseq_inorder_en; - break; - case ISCSI_PARAM_ERL: - *value = session->erl; - break; - case ISCSI_PARAM_IFMARKER_EN: - *value = 0; - break; - case ISCSI_PARAM_OFMARKER_EN: - *value = 0; - break; - default: - return ISCSI_ERR_PARAM_NOT_FOUND; - } - - return 0; -} - -static int -iscsi_iser_conn_get_param(struct iscsi_cls_conn *cls_conn, - enum iscsi_param param, uint32_t *value) -{ - struct iscsi_conn *conn = cls_conn->dd_data; - - switch(param) { - case ISCSI_PARAM_MAX_RECV_DLENGTH: - *value = conn->max_recv_dlength; - break; - case ISCSI_PARAM_MAX_XMIT_DLENGTH: - *value = conn->max_xmit_dlength; - break; - case ISCSI_PARAM_HDRDGST_EN: - *value = 0; - break; - case ISCSI_PARAM_DATADGST_EN: - *value = 0; - break; - /*case ISCSI_PARAM_TARGET_RECV_DLENGTH: - *value = conn->target_recv_dlength; - break; - case ISCSI_PARAM_INITIATOR_RECV_DLENGTH: - *value = conn->initiator_recv_dlength; - break;*/ - default: - return ISCSI_ERR_PARAM_NOT_FOUND; - } - - return 0; -} - - static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) { @@ -664,13 +540,13 @@ iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) static void iscsi_iser_ep_disconnect(__u64 ep_handle) { - struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); + struct iser_conn *ib_conn; + ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); if (!ib_conn) return; iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); - iser_conn_terminate(ib_conn); } @@ -701,7 +577,12 @@ static struct iscsi_transport iscsi_iser_transport = { ISCSI_FIRST_BURST | ISCSI_MAX_BURST | ISCSI_PDU_INORDER_EN | - ISCSI_DATASEQ_INORDER_EN, + ISCSI_DATASEQ_INORDER_EN | + ISCSI_EXP_STATSN | + ISCSI_PERSISTENT_PORT | + ISCSI_PERSISTENT_ADDRESS | + ISCSI_TARGET_NAME | + ISCSI_TPGT, .host_template = &iscsi_iser_sht, .conndata_size = sizeof(struct iscsi_conn), .max_lun = ISCSI_ISER_MAX_LUN, @@ -713,14 +594,11 @@ static struct iscsi_transport iscsi_iser_transport = { .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, .destroy_conn = iscsi_iser_conn_destroy, - .set_param = iscsi_iser_conn_set_param, - .get_conn_param = iscsi_iser_conn_get_param, - .get_session_param = iscsi_iser_session_get_param, + .set_param = iscsi_iser_set_param, + .get_conn_param = iscsi_conn_get_param, + .get_session_param = iscsi_session_get_param, .start_conn = iscsi_iser_conn_start, .stop_conn = iscsi_conn_stop, - /* these are called as part of conn recovery */ - .suspend_conn_recv = NULL, /* FIXME is/how this relvant to iser? */ - .terminate_conn = iscsi_iser_conn_terminate, /* IO */ .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_iser_conn_get_stats, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ff117bbf81b4..72febf1f8ff8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -594,7 +594,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, page_list, page_vec->length, - &io_addr); + io_addr); if (IS_ERR(mem)) { status = (int)PTR_ERR(mem); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 4e22afef7206..fd8344cdc0db 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -77,6 +77,14 @@ MODULE_PARM_DESC(topspin_workarounds, static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; +static int mellanox_workarounds = 1; + +module_param(mellanox_workarounds, int, 0444); +MODULE_PARM_DESC(mellanox_workarounds, + "Enable workarounds for Mellanox SRP target bugs if != 0"); + +static const u8 mellanox_oui[3] = { 0x00, 0x02, 0xc9 }; + static void srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device); static void srp_completion(struct ib_cq *cq, void *target_ptr); @@ -526,8 +534,10 @@ static int srp_reconnect_target(struct srp_target_port *target) while (ib_poll_cq(target->cq, 1, &wc) > 0) ; /* nothing */ + spin_lock_irq(target->scsi_host->host_lock); list_for_each_entry_safe(req, tmp, &target->req_queue, list) srp_reset_req(target, req); + spin_unlock_irq(target->scsi_host->host_lock); target->rx_head = 0; target->tx_head = 0; @@ -567,7 +577,7 @@ err: return ret; } -static int srp_map_fmr(struct srp_device *dev, struct scatterlist *scat, +static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, int sg_cnt, struct srp_request *req, struct srp_direct_buf *buf) { @@ -577,10 +587,15 @@ static int srp_map_fmr(struct srp_device *dev, struct scatterlist *scat, int page_cnt; int i, j; int ret; + struct srp_device *dev = target->srp_host->dev; if (!dev->fmr_pool) return -ENODEV; + if ((sg_dma_address(&scat[0]) & ~dev->fmr_page_mask) && + mellanox_workarounds && !memcmp(&target->ioc_guid, mellanox_oui, 3)) + return -EINVAL; + len = page_cnt = 0; for (i = 0; i < sg_cnt; ++i) { if (sg_dma_address(&scat[i]) & ~dev->fmr_page_mask) { @@ -615,9 +630,10 @@ static int srp_map_fmr(struct srp_device *dev, struct scatterlist *scat, (sg_dma_address(&scat[i]) & dev->fmr_page_mask) + j; req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool, - dma_pages, page_cnt, &io_addr); + dma_pages, page_cnt, io_addr); if (IS_ERR(req->fmr)) { ret = PTR_ERR(req->fmr); + req->fmr = NULL; goto out; } @@ -682,7 +698,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, buf->va = cpu_to_be64(sg_dma_address(scat)); buf->key = cpu_to_be32(target->srp_host->dev->mr->rkey); buf->len = cpu_to_be32(sg_dma_len(scat)); - } else if (srp_map_fmr(target->srp_host->dev, scat, count, req, + } else if (srp_map_fmr(target, scat, count, req, (void *) cmd->add_data)) { /* * FMR mapping failed, and the scatterlist has more @@ -783,13 +799,6 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) spin_unlock_irqrestore(target->scsi_host->host_lock, flags); } -static void srp_reconnect_work(void *target_ptr) -{ - struct srp_target_port *target = target_ptr; - - srp_reconnect_target(target); -} - static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) { struct srp_iu *iu; @@ -842,7 +851,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) { struct srp_target_port *target = target_ptr; struct ib_wc wc; - unsigned long flags; ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(cq, 1, &wc) > 0) { @@ -850,10 +858,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) printk(KERN_ERR PFX "failed %s status %d\n", wc.wr_id & SRP_OP_RECV ? "receive" : "send", wc.status); - spin_lock_irqsave(target->scsi_host->host_lock, flags); - if (target->state == SRP_TARGET_LIVE) - schedule_work(&target->work); - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); break; } @@ -1689,8 +1693,6 @@ static ssize_t srp_create_target(struct class_device *class_dev, target->scsi_host = target_host; target->srp_host = host; - INIT_WORK(&target->work, srp_reconnect_work, target); - INIT_LIST_HEAD(&target->free_reqs); INIT_LIST_HEAD(&target->req_queue); for (i = 0; i < SRP_SQ_SIZE; ++i) { |