diff options
author | Michal Marek <mmarek@suse.cz> | 2010-12-14 22:01:55 +0100 |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2010-12-14 22:01:55 +0100 |
commit | 8990c1bc4be46473ad19bf2fa612ca57286f3df4 (patch) | |
tree | 3cea60576903a1d26c67e6ec62891b524d390e95 /drivers/infiniband | |
parent | 2979076fbf17a0947d6eba367b0cac19c907c160 (diff) | |
parent | c8ddb2713c624f432fa5fe3c7ecffcdda46ea0d4 (diff) |
Merge commit 'v2.6.37-rc1' into kbuild/kbuild
Diffstat (limited to 'drivers/infiniband')
96 files changed, 2992 insertions, 1246 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 89d70de5e235..6e35eccc9caa 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -16,7 +16,7 @@ config INFINIBAND_USER_MAD Userspace InfiniBand Management Datagram (MAD) support. This is the kernel side of the userspace MAD support, which allows userspace processes to send and receive MADs. You will also - need libibumad from <http://www.openib.org>. + need libibumad from <http://www.openfabrics.org/downloads/management/>. config INFINIBAND_USER_ACCESS tristate "InfiniBand userspace access (verbs and CM)" @@ -28,7 +28,7 @@ config INFINIBAND_USER_ACCESS to set up connections and directly access InfiniBand hardware for fast-path operations. You will also need libibverbs, libibcm and a hardware driver library from - <http://www.openib.org>. + <http://www.openfabrics.org/git/>. config INFINIBAND_USER_MEM bool diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 0b926e45afe2..a5ea1bce9689 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -215,7 +215,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(rt->u.dst.neighbour, NULL); + neigh_event_send(rt->dst.neighbour, NULL); ret = -ENODATA; if (neigh) goto release; diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index ae7c2880e624..91916a8d5de4 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -59,8 +59,8 @@ __ib_get_agent_port(struct ib_device *device, int port_num) struct ib_agent_port_private *entry; list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->agent[0]->device == device && - entry->agent[0]->port_num == port_num) + if (entry->agent[1]->device == device && + entry->agent[1]->port_num == port_num) return entry; } return NULL; @@ -155,14 +155,16 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error1; } - /* Obtain send only MAD agent for SMI QP */ - port_priv->agent[0] = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, NULL, 0, - &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->agent[0])) { - ret = PTR_ERR(port_priv->agent[0]); - goto error2; + if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) { + /* Obtain send only MAD agent for SMI QP */ + port_priv->agent[0] = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->agent[0])) { + ret = PTR_ERR(port_priv->agent[0]); + goto error2; + } } /* Obtain send only MAD agent for GSI QP */ @@ -182,7 +184,8 @@ int ib_agent_port_open(struct ib_device *device, int port_num) return 0; error3: - ib_unregister_mad_agent(port_priv->agent[0]); + if (port_priv->agent[0]) + ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: @@ -205,7 +208,9 @@ int ib_agent_port_close(struct ib_device *device, int port_num) spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); ib_unregister_mad_agent(port_priv->agent[1]); - ib_unregister_mad_agent(port_priv->agent[0]); + if (port_priv->agent[0]) + ib_unregister_mad_agent(port_priv->agent[0]); + kfree(port_priv); return 0; } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index ad63b79afac1..64e0903091a8 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2409,10 +2409,12 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, msg_response = CM_MSG_RESPONSE_REP; break; case IB_CM_ESTABLISHED: - cm_state = cm_id->state; - lap_state = IB_CM_MRA_LAP_SENT; - msg_response = CM_MSG_RESPONSE_OTHER; - break; + if (cm_id->lap_state == IB_CM_LAP_RCVD) { + cm_state = cm_id->state; + lap_state = IB_CM_MRA_LAP_SENT; + msg_response = CM_MSG_RESPONSE_OTHER; + break; + } default: ret = -EINVAL; goto error1; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b930b8110a63..6884da24fde1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) +#define CMA_IBOE_PACKET_LIFETIME 18 static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -157,6 +158,7 @@ struct cma_multicast { struct list_head list; void *context; struct sockaddr_storage addr; + struct kref mcref; }; struct cma_work { @@ -173,6 +175,12 @@ struct cma_ndev_work { struct rdma_cm_event event; }; +struct iboe_mcast_work { + struct work_struct work; + struct rdma_id_private *id; + struct cma_multicast *mc; +}; + union cma_ip_addr { struct in6_addr ip6; struct { @@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, atomic_inc(&cma_dev->refcount); id_priv->cma_dev = cma_dev; id_priv->id.device = cma_dev->device; + id_priv->id.route.addr.dev_addr.transport = + rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); } @@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev) complete(&cma_dev->comp); } +static inline void release_mc(struct kref *kref) +{ + struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); + + kfree(mc->multicast.ib); + kfree(mc); +} + static void cma_detach_from_dev(struct rdma_id_private *id_priv) { list_del(&id_priv->list); @@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv) return ret; } +static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) +{ + int i; + int err; + struct ib_port_attr props; + union ib_gid tmp; + + err = ib_query_port(device, port_num, &props); + if (err) + return 1; + + for (i = 0; i < props.gid_tbl_len; ++i) { + err = ib_query_gid(device, port_num, i, &tmp); + if (err) + return 1; + if (!memcmp(&tmp, gid, sizeof tmp)) + return 0; + } + + return -EAGAIN; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid; + union ib_gid gid, iboe_gid; int ret = -ENODEV; + u8 port; + enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; - rdma_addr_get_sgid(dev_addr, &gid); + iboe_addr_get_sgid(dev_addr, &iboe_gid); + memcpy(&gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof gid); list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, &gid, - &id_priv->id.port_num, NULL); - if (!ret) { - cma_attach_to_dev(id_priv, cma_dev); - break; + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { + if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + ret = find_gid_port(cma_dev->device, &iboe_gid, port); + else + ret = find_gid_port(cma_dev->device, &gid, port); + + if (!ret) { + id_priv->id.port_num = port; + goto out; + } else if (ret == 1) + break; + } } } + +out: + if (!ret) + cma_attach_to_dev(id_priv, cma_dev); + return ret; } @@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int ret; + u16 pkey; + + if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == + IB_LINK_LAYER_INFINIBAND) + pkey = ib_addr_get_pkey(dev_addr); + else + pkey = 0xffff; ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, - ib_addr_get_pkey(dev_addr), - &qp_attr->pkey_index); + pkey, &qp_attr->pkey_index); if (ret) return ret; @@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps) static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { + case IB_LINK_LAYER_INFINIBAND: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; @@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); + switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_put(&mc->mcref, release_mc); + break; + default: + break; + } } } @@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_lock(&lock); if (id_priv->cma_dev) { mutex_unlock(&lock); - switch (rdma_node_get_transport(id->device->node_type)) { + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); @@ -1708,6 +1782,81 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) return 0; } +static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) +{ + struct rdma_route *route = &id_priv->id.route; + struct rdma_addr *addr = &route->addr; + struct cma_work *work; + int ret; + struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; + struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; + struct net_device *ndev = NULL; + u16 vid; + + if (src_addr->sin_family != dst_addr->sin_family) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + + route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + route->num_paths = 1; + + if (addr->dev_addr.bound_dev_if) + ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); + if (!ndev) { + ret = -ENODEV; + goto err2; + } + + vid = rdma_vlan_dev_vlan_id(ndev); + + iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); + iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); + + route->path_rec->hop_limit = 1; + route->path_rec->reversible = 1; + route->path_rec->pkey = cpu_to_be16(0xffff); + route->path_rec->mtu_selector = IB_SA_EQ; + route->path_rec->sl = id_priv->tos >> 5; + + route->path_rec->mtu = iboe_get_mtu(ndev->mtu); + route->path_rec->rate_selector = IB_SA_EQ; + route->path_rec->rate = iboe_get_rate(ndev); + dev_put(ndev); + route->path_rec->packet_life_time_selector = IB_SA_EQ; + route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { + ret = -EINVAL; + goto err2; + } + + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + work->event.status = 0; + + queue_work(cma_wq, &work->work); + + return 0; + +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1720,7 +1869,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) atomic_inc(&id_priv->refcount); switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_resolve_ib_route(id_priv, timeout_ms); + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ret = cma_resolve_ib_route(id_priv, timeout_ms); + break; + case IB_LINK_LAYER_ETHERNET: + ret = cma_resolve_iboe_route(id_priv); + break; + default: + ret = -ENOSYS; + } break; case RDMA_TRANSPORT_IWARP: ret = cma_resolve_iw_route(id_priv, timeout_ms); @@ -1773,7 +1931,7 @@ port_found: goto out; id_priv->id.route.addr.dev_addr.dev_type = - (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? + (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); @@ -2758,6 +2916,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, return 0; } +static void iboe_mcast_work_handler(struct work_struct *work) +{ + struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); + struct cma_multicast *mc = mw->mc; + struct ib_sa_multicast *m = mc->multicast.ib; + + mc->multicast.ib->context = mc; + cma_ib_mc_handler(0, m); + kref_put(&mc->mcref, release_mc); + kfree(mw); +} + +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + if (cma_any_addr(addr)) { + memset(mgid, 0, sizeof *mgid); + } else if (addr->sa_family == AF_INET6) { + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else { + mgid->raw[0] = 0xff; + mgid->raw[1] = 0x0e; + mgid->raw[2] = 0; + mgid->raw[3] = 0; + mgid->raw[4] = 0; + mgid->raw[5] = 0; + mgid->raw[6] = 0; + mgid->raw[7] = 0; + mgid->raw[8] = 0; + mgid->raw[9] = 0; + mgid->raw[10] = 0xff; + mgid->raw[11] = 0xff; + *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; + } +} + +static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct iboe_mcast_work *work; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int err; + struct sockaddr *addr = (struct sockaddr *)&mc->addr; + struct net_device *ndev = NULL; + + if (cma_zero_addr((struct sockaddr *)&mc->addr)) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); + if (!mc->multicast.ib) { + err = -ENOMEM; + goto out1; + } + + cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); + + mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); + if (id_priv->id.ps == RDMA_PS_UDP) + mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!ndev) { + err = -ENODEV; + goto out2; + } + mc->multicast.ib->rec.rate = iboe_get_rate(ndev); + mc->multicast.ib->rec.hop_limit = 1; + mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); + dev_put(ndev); + if (!mc->multicast.ib->rec.mtu) { + err = -EINVAL; + goto out2; + } + iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); + work->id = id_priv; + work->mc = mc; + INIT_WORK(&work->work, iboe_mcast_work_handler); + kref_get(&mc->mcref); + queue_work(cma_wq, &work->work); + + return 0; + +out2: + kfree(mc->multicast.ib); +out1: + kfree(work); + return err; +} + int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, void *context) { @@ -2784,7 +3038,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_join_ib_multicast(id_priv, mc); + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ret = cma_join_ib_multicast(id_priv, mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + break; + default: + ret = -EINVAL; + } break; default: ret = -ENOSYS; @@ -2817,8 +3081,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, mc->multicast.ib->rec.mlid); - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_put(&mc->mcref, release_mc); + break; + default: + break; + } + } return; } } diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index bfead5bc25f6..2a1e9ae134b4 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -506,6 +506,8 @@ int iw_cm_accept(struct iw_cm_id *cm_id, qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); @@ -565,6 +567,8 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ef1304f151dc..822cfdcd9f78 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2598,6 +2598,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) struct ib_mad_private *recv; struct ib_mad_list_head *mad_list; + if (!qp_info->qp) + return; + while (!list_empty(&qp_info->recv_queue.list)) { mad_list = list_entry(qp_info->recv_queue.list.next, @@ -2639,6 +2642,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; + if (!qp) + continue; + /* * PKey index for QP1 is irrelevant but * one is needed for the Reset to Init transition @@ -2680,6 +2686,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) } for (i = 0; i < IB_MAD_QPS_CORE; i++) { + if (!port_priv->qp_info[i].qp) + continue; + ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { printk(KERN_ERR PFX "Couldn't post receive WRs\n"); @@ -2758,6 +2767,9 @@ error: static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { + if (!qp_info->qp) + return; + ib_destroy_qp(qp_info->qp); kfree(qp_info->snoop_table); } @@ -2773,6 +2785,7 @@ static int ib_mad_port_open(struct ib_device *device, struct ib_mad_port_private *port_priv; unsigned long flags; char name[sizeof "ib_mad123"]; + int has_smi; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); @@ -2788,7 +2801,11 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); - cq_size = (mad_sendq_size + mad_recvq_size) * 2; + cq_size = mad_sendq_size + mad_recvq_size; + has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + if (has_smi) + cq_size *= 2; + port_priv->cq = ib_create_cq(port_priv->device, ib_mad_thread_completion_handler, NULL, port_priv, cq_size, 0); @@ -2812,9 +2829,11 @@ static int ib_mad_port_open(struct ib_device *device, goto error5; } - ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); - if (ret) - goto error6; + if (has_smi) { + ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); + if (ret) + goto error6; + } ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); if (ret) goto error7; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index a519801dcfb7..68b4162fd9d2 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -774,6 +774,10 @@ static void mcast_event_handler(struct ib_event_handler *handler, int index; dev = container_of(handler, struct mcast_device, event_handler); + if (rdma_port_get_link_layer(dev->device, event->element.port_num) != + IB_LINK_LAYER_INFINIBAND) + return; + index = event->element.port_num - dev->start_port; switch (event->event) { @@ -796,6 +800,7 @@ static void mcast_add_one(struct ib_device *device) struct mcast_device *dev; struct mcast_port *port; int i; + int count = 0; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; @@ -813,6 +818,9 @@ static void mcast_add_one(struct ib_device *device) } for (i = 0; i <= dev->end_port - dev->start_port; i++) { + if (rdma_port_get_link_layer(device, dev->start_port + i) != + IB_LINK_LAYER_INFINIBAND) + continue; port = &dev->port[i]; port->dev = dev; port->port_num = dev->start_port + i; @@ -820,6 +828,12 @@ static void mcast_add_one(struct ib_device *device) port->table = RB_ROOT; init_completion(&port->comp); atomic_set(&port->refcount, 1); + ++count; + } + + if (!count) { + kfree(dev); + return; } dev->device = device; @@ -843,9 +857,12 @@ static void mcast_remove_one(struct ib_device *device) flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { - port = &dev->port[i]; - deref_port(port); - wait_for_completion(&port->comp); + if (rdma_port_get_link_layer(device, dev->start_port + i) == + IB_LINK_LAYER_INFINIBAND) { + port = &dev->port[i]; + deref_port(port); + wait_for_completion(&port->comp); + } } kfree(dev); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7e1ffd8ccd5c..91a660310b7c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -416,6 +416,9 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; + if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND) + return; + spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); @@ -493,6 +496,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, { int ret; u16 gid_index; + int force_grh; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(rec->dlid); @@ -502,7 +506,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; - if (rec->hop_limit > 1) { + force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; + + if (rec->hop_limit > 1 || force_grh) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; @@ -1007,7 +1013,7 @@ static void ib_sa_add_one(struct ib_device *device) e = device->phys_port_cnt; } - sa_dev = kmalloc(sizeof *sa_dev + + sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), GFP_KERNEL); if (!sa_dev) @@ -1017,9 +1023,12 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { + spin_lock_init(&sa_dev->port[i].ah_lock); + if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND) + continue; + sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; - spin_lock_init(&sa_dev->port[i].ah_lock); sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, @@ -1045,13 +1054,15 @@ static void ib_sa_add_one(struct ib_device *device) goto err; for (i = 0; i <= e - s; ++i) - update_sm_ah(&sa_dev->port[i].update_task); + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + update_sm_ah(&sa_dev->port[i].update_task); return; err: while (--i >= 0) - ib_unregister_mad_agent(sa_dev->port[i].agent); + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + ib_unregister_mad_agent(sa_dev->port[i].agent); kfree(sa_dev); @@ -1071,9 +1082,12 @@ static void ib_sa_remove_one(struct ib_device *device) flush_scheduled_work(); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { - ib_unregister_mad_agent(sa_dev->port[i].agent); - if (sa_dev->port[i].sm_ah) - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { + ib_unregister_mad_agent(sa_dev->port[i].agent); + if (sa_dev->port[i].sm_ah) + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + } + } kfree(sa_dev); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 3627300e2a10..9ab5df72df7b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -222,6 +222,19 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, } } +static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + return sprintf(buf, "%s\n", "InfiniBand"); + case IB_LINK_LAYER_ETHERNET: + return sprintf(buf, "%s\n", "Ethernet"); + default: + return sprintf(buf, "%s\n", "Unknown"); + } +} + static PORT_ATTR_RO(state); static PORT_ATTR_RO(lid); static PORT_ATTR_RO(lid_mask_count); @@ -230,6 +243,7 @@ static PORT_ATTR_RO(sm_sl); static PORT_ATTR_RO(cap_mask); static PORT_ATTR_RO(rate); static PORT_ATTR_RO(phys_state); +static PORT_ATTR_RO(link_layer); static struct attribute *port_default_attrs[] = { &port_attr_state.attr, @@ -240,6 +254,7 @@ static struct attribute *port_default_attrs[] = { &port_attr_cap_mask.attr, &port_attr_rate.attr, &port_attr_phys_state.attr, + &port_attr_link_layer.attr, NULL }; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ac7edc24165c..ca12acf38379 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -40,6 +40,7 @@ #include <linux/in6.h> #include <linux/miscdevice.h> #include <linux/slab.h> +#include <linux/sysctl.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -50,8 +51,24 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); MODULE_LICENSE("Dual BSD/GPL"); -enum { - UCMA_MAX_BACKLOG = 128 +static unsigned int max_backlog = 1024; + +static struct ctl_table_header *ucma_ctl_table_hdr; +static ctl_table ucma_ctl_table[] = { + { + .procname = "max_backlog", + .data = &max_backlog, + .maxlen = sizeof max_backlog, + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static struct ctl_path ucma_ctl_path[] = { + { .procname = "net" }, + { .procname = "rdma_ucm" }, + { } }; struct ucma_file { @@ -583,6 +600,42 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, } } +static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + struct net_device *dev; + u16 vid = 0; + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + dev_addr = &route->addr.dev_addr; + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, + dev_addr->dst_dev_addr, vid); + iboe_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); + resp->ib_route[0].pkey = cpu_to_be16(0xffff); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -617,12 +670,17 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - default: - break; + if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) { + switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + break; + case IB_LINK_LAYER_ETHERNET: + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + break; + default: + break; + } } out: @@ -686,8 +744,8 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ? - cmd.backlog : UCMA_MAX_BACKLOG; + ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? + cmd.backlog : max_backlog; ret = rdma_listen(ctx->cm_id, ctx->backlog); ucma_put_ctx(ctx); return ret; @@ -1279,16 +1337,26 @@ static int __init ucma_init(void) ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); - goto err; + goto err1; + } + + ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table); + if (!ucma_ctl_table_hdr) { + printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); + ret = -ENOMEM; + goto err2; } return 0; -err: +err2: + device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); +err1: misc_deregister(&ucma_misc); return ret; } static void __exit ucma_cleanup(void) { + unregister_sysctl_table(ucma_ctl_table_hdr); device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 650b501eb142..bb7e19280821 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/string.h> +#include <linux/if_ether.h> #include <rdma/ib_pack.h> @@ -80,6 +81,40 @@ static const struct ib_field lrh_table[] = { .size_bits = 16 } }; +static const struct ib_field eth_table[] = { + { STRUCT_FIELD(eth, dmac_h), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(eth, dmac_l), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(eth, smac_h), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(eth, smac_l), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(eth, type), + .offset_words = 3, + .offset_bits = 0, + .size_bits = 16 } +}; + +static const struct ib_field vlan_table[] = { + { STRUCT_FIELD(vlan, tag), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(vlan, type), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 } +}; + static const struct ib_field grh_table[] = { { STRUCT_FIELD(grh, ip_version), .offset_words = 0, @@ -180,38 +215,43 @@ static const struct ib_field deth_table[] = { /** * ib_ud_header_init - Initialize UD header structure * @payload_bytes:Length of packet payload + * @lrh_present: specify if LRH is present + * @eth_present: specify if Eth header is present + * @vlan_present: packet is tagged vlan * @grh_present:GRH flag (if non-zero, GRH will be included) - * @immediate_present: specify if immediate data should be used + * @immediate_present: specify if immediate data is present * @header:Structure to initialize - * - * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, - * lrh.packet_length, grh.ip_version, grh.payload_length, - * grh.next_header, bth.opcode, bth.pad_count and - * bth.transport_header_version fields of a &struct ib_ud_header given - * the payload length and whether a GRH will be included. */ void ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header) { - u16 packet_length; - memset(header, 0, sizeof *header); - header->lrh.link_version = 0; - header->lrh.link_next_header = - grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; - packet_length = (IB_LRH_BYTES + - IB_BTH_BYTES + - IB_DETH_BYTES + - payload_bytes + - 4 + /* ICRC */ - 3) / 4; /* round up */ - - header->grh_present = grh_present; + if (lrh_present) { + u16 packet_length; + + header->lrh.link_version = 0; + header->lrh.link_next_header = + grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; + packet_length = (IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + (grh_present ? IB_GRH_BYTES : 0) + + payload_bytes + + 4 + /* ICRC */ + 3) / 4; /* round up */ + header->lrh.packet_length = cpu_to_be16(packet_length); + } + + if (vlan_present) + header->eth.type = cpu_to_be16(ETH_P_8021Q); + if (grh_present) { - packet_length += IB_GRH_BYTES / 4; header->grh.ip_version = 6; header->grh.payload_length = cpu_to_be16((IB_BTH_BYTES + @@ -222,19 +262,52 @@ void ib_ud_header_init(int payload_bytes, header->grh.next_header = 0x1b; } - header->lrh.packet_length = cpu_to_be16(packet_length); - - header->immediate_present = immediate_present; if (immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; else header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; header->bth.pad_count = (4 - payload_bytes) & 3; header->bth.transport_header_version = 0; + + header->lrh_present = lrh_present; + header->eth_present = eth_present; + header->vlan_present = vlan_present; + header->grh_present = grh_present; + header->immediate_present = immediate_present; } EXPORT_SYMBOL(ib_ud_header_init); /** + * ib_lrh_header_pack - Pack LRH header struct into wire format + * @lrh:unpacked LRH header struct + * @buf:Buffer to pack into + * + * ib_lrh_header_pack() packs the LRH header structure @lrh into + * wire format in the buffer @buf. + */ +int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf) +{ + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf); + return 0; +} +EXPORT_SYMBOL(ib_lrh_header_pack); + +/** + * ib_lrh_header_unpack - Unpack LRH structure from wire format + * @lrh:unpacked LRH header struct + * @buf:Buffer to pack into + * + * ib_lrh_header_unpack() unpacks the LRH header structure from + * wire format (in buf) into @lrh. + */ +int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh) +{ + ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh); + return 0; +} +EXPORT_SYMBOL(ib_lrh_header_unpack); + +/** * ib_ud_header_pack - Pack UD header struct into wire format * @header:UD header struct * @buf:Buffer to pack into @@ -247,10 +320,21 @@ int ib_ud_header_pack(struct ib_ud_header *header, { int len = 0; - ib_pack(lrh_table, ARRAY_SIZE(lrh_table), - &header->lrh, buf); - len += IB_LRH_BYTES; - + if (header->lrh_present) { + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), + &header->lrh, buf + len); + len += IB_LRH_BYTES; + } + if (header->eth_present) { + ib_pack(eth_table, ARRAY_SIZE(eth_table), + &header->eth, buf + len); + len += IB_ETH_BYTES; + } + if (header->vlan_present) { + ib_pack(vlan_table, ARRAY_SIZE(vlan_table), + &header->vlan, buf + len); + len += IB_VLAN_BYTES; + } if (header->grh_present) { ib_pack(grh_table, ARRAY_SIZE(grh_table), &header->grh, buf + len); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 6babb72b39fc..cd1996d0ad08 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1022,7 +1022,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->ib_dev = device; port->port_num = port_num; - init_MUTEX(&port->sm_sem); + sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); @@ -1085,7 +1085,6 @@ err_cdev: static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; - int already_dead; int id; dev_set_drvdata(port->dev, NULL); @@ -1103,7 +1102,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port) list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); - already_dead = file->agents_dead; file->agents_dead = 1; mutex_unlock(&file->mutex); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6fcfbeb24a23..b342248aec05 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -460,6 +460,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; + resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev, + cmd.port_num); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a7da9be43e61..af7a8b08b2e9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -94,6 +94,22 @@ rdma_node_get_transport(enum rdma_node_type node_type) } EXPORT_SYMBOL(rdma_node_get_transport); +enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) +{ + if (device->get_link_layer) + return device->get_link_layer(device, port_num); + + switch (rdma_node_get_transport(device->node_type)) { + case RDMA_TRANSPORT_IB: + return IB_LINK_LAYER_INFINIBAND; + case RDMA_TRANSPORT_IWARP: + return IB_LINK_LAYER_ETHERNET; + default: + return IB_LINK_LAYER_UNSPECIFIED; + } +} +EXPORT_SYMBOL(rdma_port_get_link_layer); + /* Protection domains */ struct ib_pd *ib_alloc_pd(struct ib_device *device) @@ -310,8 +326,8 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; - enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1]; - enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1]; + enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETHERTYPE + 1]; + enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETHERTYPE + 1]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild index 06964c4af849..950dfabcd89d 100644 --- a/drivers/infiniband/hw/amso1100/Kbuild +++ b/drivers/infiniband/hw/amso1100/Kbuild @@ -1,6 +1,4 @@ -ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c index 3b5095470cb3..0ebe4e806b86 100644 --- a/drivers/infiniband/hw/amso1100/c2_intr.c +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -62,8 +62,8 @@ void c2_rnic_interrupt(struct c2_dev *c2dev) static void handle_mq(struct c2_dev *c2dev, u32 mq_index) { if (c2dev->qptr_array[mq_index] == NULL) { - pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n", - mq_index); + pr_debug("handle_mq: stray activity for mq_index=%d\n", + mq_index); return; } diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 2acec3fadf69..2b6352b85485 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -10,7 +10,7 @@ config INFINIBAND_CXGB3 our website at <http://www.chelsio.com>. For customer support, please visit our customer support page at - <http://www.chelsio.com/support.htm>. + <http://www.chelsio.com/support.html>. Please send feedback to <linux-bugs@chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 7e7b5a66f042..621619c794e5 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,10 +1,8 @@ -EXTRA_CFLAGS += -Idrivers/net/cxgb3 +ccflags-y := -Idrivers/net/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o -ifdef CONFIG_INFINIBAND_CXGB3_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 005b7b52bc1e..09dda0b8740e 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -160,6 +160,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) struct rdma_cq_setup setup; int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); + size += 1; /* one extra page for storing cq-in-err state */ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); if (!cq->cqid) return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 8f0caf7d4482..78fbe9ffe7f0 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -53,7 +53,7 @@ #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 #define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 262144 +#define T3_MAX_CQ_DEPTH 65536 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL #define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index e5ddb63e7d23..4bb997aa39d0 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -728,6 +728,22 @@ struct t3_cq { #define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ CQE_GENBIT(*cqe)) +struct t3_cq_status_page { + u32 cq_err; +}; + +static inline int cxio_cq_in_error(struct t3_cq *cq) +{ + return ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err; +} + +static inline void cxio_set_cq_in_error(struct t3_cq *cq) +{ + ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err = 1; +} + static inline void cxio_set_wq_in_error(struct t3_wq *wq) { wq->queue->wq_in_err.err |= 1; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index ebfb117ba68b..d02dcc6e5963 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -137,7 +137,7 @@ static void stop_ep_timer(struct iwch_ep *ep) put_ep(&ep->com); } -int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) +static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) { int error = 0; struct cxio_rdev *rdev; @@ -463,7 +463,8 @@ static int send_connect(struct iwch_ep *ep) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); skb->priority = CPL_PRIORITY_SETUP; set_arp_failure_handler(skb, act_open_req_arp_failure); @@ -1092,8 +1093,8 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p credits %u\n", __func__, ep, credits); if (credits == 0) { - PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); + PDBG("%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); return CPL_RET_BUF_DONE; } @@ -1280,7 +1281,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); rpl = cplhdr(skb); rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); @@ -1364,7 +1366,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) __func__); goto reject; } - dst = &rt->u.dst; + dst = &rt->dst; l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", @@ -1932,7 +1934,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -EHOSTUNREACH; goto fail3; } - ep->dst = &rt->u.dst; + ep->dst = &rt->dst; /* get a l2t entry */ ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour, diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 6afc89e7572c..71e0d845da3d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -76,6 +76,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, atomic_inc(&qhp->refcnt); spin_unlock(&rnicp->lock); + if (qhp->attr.state == IWCH_QP_STATE_RTS) { + attrs.next_state = IWCH_QP_STATE_TERMINATE; + iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (send_term) + iwch_post_terminate(qhp, rsp_msg); + } + event.event = ib_event; event.device = chp->ibcq.device; if (ib_event == IB_EVENT_CQ_ERR) @@ -86,13 +94,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); @@ -179,7 +181,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) case TPT_ERR_BOUND: case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index fca0b4b747e4..2e2741307af4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -154,6 +154,8 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; struct iwch_ucontext *ucontext = NULL; + static int warned; + size_t resplen; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); rhp = to_iwch_dev(ibdev); @@ -217,15 +219,26 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve uresp.key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + if (udata->outlen < sizeof uresp) { + if (!warned++) + printk(KERN_WARNING MOD "Warning - " + "downlevel libcxgb3 (non-fatal).\n"); + mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * + sizeof(struct t3_cqe)); + resplen = sizeof(struct iwch_create_cq_resp_v0); + } else { + mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * + sizeof(struct t3_cqe)); + uresp.memsize = mm->len; + resplen = sizeof uresp; + } + if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); iwch_destroy_cq(&chp->ibcq); return ERR_PTR(-EFAULT); } - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof (struct t3_cqe)); insert_mmap(ucontext, mm); } PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n", @@ -1414,6 +1427,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index ae47bfd22bd5..0993137181d7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -371,7 +371,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - if (num_wrs <= 0) { + if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); err = -ENOMEM; goto out; @@ -554,7 +554,7 @@ int iwch_bind_mw(struct ib_qp *qp, } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - if ((num_wrs) <= 0) { + if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } @@ -802,21 +802,19 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* * Assumes qhp lock is held. */ -static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, + struct iwch_cq *schp, unsigned long *flag) { - struct iwch_cq *rchp, *schp; int count; int flushed; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ atomic_inc(&qhp->refcnt); spin_unlock_irqrestore(&qhp->lock, *flag); - /* locking heirarchy: cq lock first, then qp lock. */ + /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, *flag); spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); @@ -827,7 +825,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) if (flushed) (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - /* locking heirarchy: cq lock first, then qp lock. */ + /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); @@ -847,10 +845,23 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) { - if (qhp->ibqp.uobject) + struct iwch_cq *rchp, *schp; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); - else - __flush_qp(qhp, flag); + cxio_set_cq_in_error(&rchp->cq); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + if (schp != rchp) { + cxio_set_cq_in_error(&schp->cq); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + } + return; + } + __flush_qp(qhp, rchp, schp, flag); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h index cb7086f558c1..a277c31fcaf7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -45,10 +45,18 @@ struct iwch_create_cq_req { __u64 user_rptr_addr; }; +struct iwch_create_cq_resp_v0 { + __u64 key; + __u32 cqid; + __u32 size_log2; +}; + struct iwch_create_cq_resp { __u64 key; __u32 cqid; __u32 size_log2; + __u32 memsize; + __u32 reserved; }; struct iwch_create_qp_resp { diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index ccb85eaaad75..6b7e6c543534 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -10,7 +10,7 @@ config INFINIBAND_CXGB4 our website at <http://www.chelsio.com>. For customer support, please visit our customer support page at - <http://www.chelsio.com/support.htm>. + <http://www.chelsio.com/support.html>. Please send feedback to <linux-bugs@chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e31a499f0172..cd20b1342aec 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -Idrivers/net/cxgb4 +ccflags-y := -Idrivers/net/cxgb4 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 855ee44fdb52..0dc62b1438be 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -61,6 +61,10 @@ static char *states[] = { NULL, }; +static int dack_mode; +module_param(dack_mode, int, 0644); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); + int c4iw_max_read_depth = 8; module_param(c4iw_max_read_depth, int, 0644); MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)"); @@ -113,9 +117,9 @@ static int rcv_win = 256 * 1024; module_param(rcv_win, int, 0644); MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); -static int snd_win = 32 * 1024; +static int snd_win = 128 * 1024; module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); static struct workqueue_struct *workq; @@ -168,7 +172,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) @@ -183,7 +187,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) @@ -215,12 +219,11 @@ static void set_emss(struct c4iw_ep *ep, u16 opt) static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) { - unsigned long flags; enum c4iw_ep_state state; - spin_lock_irqsave(&epc->lock, flags); + mutex_lock(&epc->mutex); state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); + mutex_unlock(&epc->mutex); return state; } @@ -231,12 +234,10 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) { - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); + mutex_lock(&epc->mutex); PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]); __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); + mutex_unlock(&epc->mutex); return; } @@ -247,8 +248,8 @@ static void *alloc_ep(int size, gfp_t gfp) epc = kzalloc(size, gfp); if (epc) { kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); + mutex_init(&epc->mutex); + c4iw_init_wr_wait(&epc->wr_wait); } PDBG("%s alloc ep %p\n", __func__, epc); return epc; @@ -469,11 +470,12 @@ static int send_connect(struct c4iw_ep *ep) __func__); return -ENOMEM; } - set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->txq_idx); + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); opt0 = KEEP_ALIVE(1) | + DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | @@ -780,11 +782,11 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } - if (ep->com.cm_id) { - PDBG("%s ep %p tid %u status %d\n", __func__, ep, - ep->hwtid, status); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } + + PDBG("%s ep %p tid %u status %d\n", __func__, ep, + ep->hwtid, status); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + if (status < 0) { ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; @@ -845,8 +847,10 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) INIT_TP_WR(req, ep->hwtid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = cpu_to_be32(credits); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->txq_idx); + req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) | + F_RX_DACK_CHANGE | + V_RX_DACK_MODE(dack_mode)); + set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } @@ -1124,7 +1128,6 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_abort_rpl_rss *rpl = cplhdr(skb); - unsigned long flags; int release = 0; unsigned int tid = GET_TID(rpl); struct tid_info *t = dev->rdev.lldi.tids; @@ -1132,7 +1135,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(!ep); - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: __state_set(&ep->com, DEAD); @@ -1143,7 +1146,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) __func__, ep, ep->com.state); break; } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); if (release) release_ep_resources(ep); @@ -1206,9 +1209,9 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + ep->com.wr_wait.ret = status2errno(rpl->status); + ep->com.wr_wait.done = 1; + wake_up(&ep->com.wr_wait.wait); return 0; } @@ -1242,9 +1245,9 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); PDBG("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + ep->com.wr_wait.ret = status2errno(rpl->status); + ep->com.wr_wait.done = 1; + wake_up(&ep->com.wr_wait.wait); return 0; } @@ -1264,6 +1267,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); opt0 = KEEP_ALIVE(1) | + DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | @@ -1287,7 +1291,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, ep->hwtid)); rpl->opt0 = cpu_to_be64(opt0); rpl->opt2 = cpu_to_be32(opt2); - set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->txq_idx); + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); return; @@ -1344,7 +1348,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) u16 rss_qid; u32 mtu; int step; - int txq_idx; + int txq_idx, ctrlq_idx; parent_ep = lookup_stid(t, stid); PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); @@ -1365,7 +1369,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) __func__); goto reject; } - dst = &rt->u.dst; + dst = &rt->dst; if (dst->neighbour->dev->flags & IFF_LOOPBACK) { pdev = ip_dev_find(&init_net, peer_ip); BUG_ON(!pdev); @@ -1376,6 +1380,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; txq_idx = cxgb4_port_idx(pdev) * step; + ctrlq_idx = cxgb4_port_idx(pdev); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[cxgb4_port_idx(pdev) * step]; dev_put(pdev); @@ -1387,6 +1392,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) smac_idx = (cxgb4_port_viid(dst->neighbour->dev) & 0x7F) << 1; step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; txq_idx = cxgb4_port_idx(dst->neighbour->dev) * step; + ctrlq_idx = cxgb4_port_idx(dst->neighbour->dev); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[ cxgb4_port_idx(dst->neighbour->dev) * step]; @@ -1426,6 +1432,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) child_ep->rss_qid = rss_qid; child_ep->mtu = mtu; child_ep->txq_idx = txq_idx; + child_ep->ctrlq_idx = ctrlq_idx; PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, tx_chan, smac_idx, rss_qid); @@ -1467,20 +1474,17 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_peer_close *hdr = cplhdr(skb); struct c4iw_ep *ep; struct c4iw_qp_attributes attrs; - unsigned long flags; int disconnect = 1; int release = 0; int closing = 0; struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(hdr); - int start_timer = 0; - int stop_timer = 0; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: __state_set(&ep->com, CLOSING); @@ -1498,20 +1502,20 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) * in rdma connection migration (see c4iw_accept_cr()). */ __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; + ep->com.wr_wait.done = 1; + ep->com.wr_wait.ret = -ECONNRESET; PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); - wake_up(&ep->com.waitq); + wake_up(&ep->com.wr_wait.wait); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; + ep->com.wr_wait.done = 1; + ep->com.wr_wait.ret = -ECONNRESET; PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); - wake_up(&ep->com.waitq); + wake_up(&ep->com.wr_wait.wait); break; case FPDU_MODE: - start_timer = 1; + start_ep_timer(ep); __state_set(&ep->com, CLOSING); closing = 1; peer_close_upcall(ep); @@ -1524,7 +1528,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) disconnect = 0; break; case MORIBUND: - stop_timer = 1; + stop_ep_timer(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, @@ -1541,16 +1545,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) default: BUG_ON(1); } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); if (closing) { attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - if (start_timer) - start_ep_timer(ep); - if (stop_timer) - stop_ep_timer(ep); if (disconnect) c4iw_ep_disconnect(ep, 0, GFP_KERNEL); if (release) @@ -1576,10 +1576,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; int ret; int release = 0; - unsigned long flags; struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(req); - int stop_timer = 0; ep = lookup_tid(t, tid); if (is_neg_adv_abort(req->status)) { @@ -1587,41 +1585,34 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ep->hwtid); return 0; } - spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + */ + ep->com.wr_wait.done = 1; + ep->com.wr_wait.ret = -ECONNRESET; + wake_up(&ep->com.wr_wait.wait); + + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CONNECTING: break; case MPA_REQ_WAIT: - stop_timer = 1; + stop_ep_timer(ep); break; case MPA_REQ_SENT: - stop_timer = 1; + stop_ep_timer(ep); connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - PDBG("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); break; case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see c4iw_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); - wake_up(&ep->com.waitq); break; case MORIBUND: case CLOSING: - stop_timer = 1; + stop_ep_timer(ep); /*FALLTHROUGH*/ case FPDU_MODE: if (ep->com.cm_id && ep->com.qp) { @@ -1640,7 +1631,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case DEAD: PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); return 0; default: BUG_ON(1); @@ -1651,7 +1642,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) __state_set(&ep->com, DEAD); release = 1; } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); if (!rpl_skb) { @@ -1667,8 +1658,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) rpl->cmd = CPL_ABORT_NO_RST; c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: - if (stop_timer) - stop_ep_timer(ep); if (release) release_ep_resources(ep); return 0; @@ -1679,11 +1668,9 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_ep *ep; struct c4iw_qp_attributes attrs; struct cpl_close_con_rpl *rpl = cplhdr(skb); - unsigned long flags; int release = 0; struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(rpl); - int stop_timer = 0; ep = lookup_tid(t, tid); @@ -1691,13 +1678,13 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) BUG_ON(!ep); /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CLOSING: __state_set(&ep->com, MORIBUND); break; case MORIBUND: - stop_timer = 1; + stop_ep_timer(ep); if ((ep->com.cm_id) && (ep->com.qp)) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, @@ -1716,9 +1703,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) BUG_ON(1); break; } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (stop_timer) - stop_ep_timer(ep); + mutex_unlock(&ep->com.mutex); if (release) release_ep_resources(ep); return 0; @@ -1726,23 +1711,24 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *ep; - struct cpl_rdma_terminate *term = cplhdr(skb); + struct cpl_rdma_terminate *rpl = cplhdr(skb); struct tid_info *t = dev->rdev.lldi.tids; - unsigned int tid = GET_TID(term); + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; ep = lookup_tid(t, tid); + BUG_ON(!ep); - if (state_read(&ep->com) != FPDU_MODE) - return 0; + if (ep->com.qp) { + printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } else + printk(KERN_WARNING MOD "TERM received tid %u no qp\n", tid); - PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb_pull(skb, sizeof *term); - PDBG("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; return 0; } @@ -1763,8 +1749,8 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); if (credits == 0) { - PDBG(KERN_ERR "%s 0 credit ack ep %p tid %u state %u\n", - __func__, ep, ep->hwtid, state_read(&ep->com)); + PDBG("%s 0 credit ack ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, state_read(&ep->com)); return 0; } @@ -1939,7 +1925,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -EHOSTUNREACH; goto fail3; } - ep->dst = &rt->u.dst; + ep->dst = &rt->dst; /* get a l2t entry */ if (ep->dst->neighbour->dev->flags & IFF_LOOPBACK) { @@ -1957,6 +1943,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->txq_idx = cxgb4_port_idx(pdev) * step; step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ cxgb4_port_idx(pdev) * step]; dev_put(pdev); @@ -1971,6 +1958,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(ep->dst->neighbour->dev) * step; + ep->ctrlq_idx = cxgb4_port_idx(ep->dst->neighbour->dev); step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ @@ -2041,6 +2029,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } state_set(&ep->com, LISTEN); + c4iw_init_wr_wait(&ep->com.wr_wait); err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, ep->com.local_addr.sin_addr.s_addr, ep->com.local_addr.sin_port, @@ -2049,8 +2038,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto fail3; /* wait for pass_open_rpl */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, + __func__); if (!err) { cm_id->provider_data = ep; goto out; @@ -2074,15 +2063,14 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; + c4iw_init_wr_wait(&ep->com.wr_wait); err = listen_stop(ep); if (err) goto done; - wait_event(ep->com.waitq, ep->com.rpl_done); + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, + __func__); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: - err = ep->com.rpl_err; cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); return err; @@ -2091,14 +2079,11 @@ done: int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) { int ret = 0; - unsigned long flags; int close = 0; int fatal = 0; struct c4iw_rdev *rdev; - int start_timer = 0; - int stop_timer = 0; - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); @@ -2120,7 +2105,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) ep->com.state = ABORTING; else { ep->com.state = CLOSING; - start_timer = 1; + start_ep_timer(ep); } set_bit(CLOSE_SENT, &ep->com.flags); break; @@ -2128,7 +2113,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { close = 1; if (abrupt) { - stop_timer = 1; + stop_ep_timer(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; @@ -2145,11 +2130,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) break; } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (start_timer) - start_ep_timer(ep); - if (stop_timer) - stop_ep_timer(ep); + mutex_unlock(&ep->com.mutex); if (close) { if (abrupt) ret = abort_connection(ep, NULL, gfp); @@ -2163,6 +2144,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } +static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + return 0; +} + /* * These are the real handlers that are called from a * work queue. @@ -2181,7 +2169,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_ABORT_REQ_RSS] = peer_abort, [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, - [CPL_FW4_ACK] = fw4_ack + [CPL_FW4_ACK] = fw4_ack, + [CPL_FW6_MSG] = async_event }; static void process_timeout(struct c4iw_ep *ep) @@ -2189,7 +2178,7 @@ static void process_timeout(struct c4iw_ep *ep) struct c4iw_qp_attributes attrs; int abort = 1; - spin_lock_irq(&ep->com.lock); + mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); switch (ep->com.state) { @@ -2216,7 +2205,7 @@ static void process_timeout(struct c4iw_ep *ep) WARN_ON(1); abort = 0; } - spin_unlock_irq(&ep->com.lock); + mutex_unlock(&ep->com.mutex); if (abort) abort_connection(ep, NULL, GFP_KERNEL); c4iw_put_ep(&ep->com); @@ -2244,7 +2233,7 @@ static void process_work(struct work_struct *work) { struct sk_buff *skb = NULL; struct c4iw_dev *dev; - struct cpl_act_establish *rpl = cplhdr(skb); + struct cpl_act_establish *rpl; unsigned int opcode; int ret; @@ -2300,6 +2289,7 @@ static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " "for tid %u\n", rpl->status, GET_TID(rpl)); } + kfree_skb(skb); return 0; } @@ -2314,20 +2304,25 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) switch (rpl->type) { case 1: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); - wr_waitp = (__force struct c4iw_wr_wait *)rpl->data[1]; + wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); if (wr_waitp) { - wr_waitp->ret = ret; + if (ret) + wr_waitp->ret = -ret; + else + wr_waitp->ret = 0; wr_waitp->done = 1; wake_up(&wr_waitp->wait); } + kfree_skb(skb); break; case 2: - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + sched(dev, skb); break; default: printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__, rpl->type); + kfree_skb(skb); break; } return 0; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index fac5c6e68011..8d8f8add6fcd 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -43,7 +43,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, int ret; wr_len = sizeof *res_wr + sizeof *res; - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); @@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, V_FW_RI_RES_WR_NRES(1) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (u64)&wr_wait; + res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_RESET; @@ -64,14 +64,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, c4iw_init_wr_wait(&wr_wait); ret = c4iw_ofld_send(rdev, skb); if (!ret) { - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); } kfree(cq->sw_queue); @@ -118,7 +111,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto err4; @@ -132,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, V_FW_RI_RES_WR_NRES(1) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (u64)&wr_wait; + res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; @@ -157,14 +150,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, if (ret) goto err4; PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait); - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); if (ret) goto err4; @@ -476,6 +462,11 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, goto proc_cqe; } + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + /* * RECV completion. */ @@ -696,6 +687,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) case T4_ERR_MSN_RANGE: case T4_ERR_IRD_OVERFLOW: case T4_ERR_OPCODE: + case T4_ERR_INTERNAL_ERR: wc->status = IB_WC_FATAL_ERR; break; case T4_ERR_SWFLUSH: diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index d870f9c17c1e..54fbc1118abe 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -49,29 +49,33 @@ static DEFINE_MUTEX(dev_mutex); static struct dentry *c4iw_debugfs_root; -struct debugfs_qp_data { +struct c4iw_debugfs_data { struct c4iw_dev *devp; char *buf; int bufsize; int pos; }; -static int count_qps(int id, void *p, void *data) +static int count_idrs(int id, void *p, void *data) { - struct c4iw_qp *qp = p; int *countp = data; - if (id != qp->wq.sq.qid) - return 0; - *countp = *countp + 1; return 0; } -static int dump_qps(int id, void *p, void *data) +static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct c4iw_debugfs_data *d = file->private_data; + + return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); +} + +static int dump_qp(int id, void *p, void *data) { struct c4iw_qp *qp = p; - struct debugfs_qp_data *qpd = data; + struct c4iw_debugfs_data *qpd = data; int space; int cc; @@ -101,7 +105,7 @@ static int dump_qps(int id, void *p, void *data) static int qp_release(struct inode *inode, struct file *file) { - struct debugfs_qp_data *qpd = file->private_data; + struct c4iw_debugfs_data *qpd = file->private_data; if (!qpd) { printk(KERN_INFO "%s null qpd?\n", __func__); return 0; @@ -113,7 +117,7 @@ static int qp_release(struct inode *inode, struct file *file) static int qp_open(struct inode *inode, struct file *file) { - struct debugfs_qp_data *qpd; + struct c4iw_debugfs_data *qpd; int ret = 0; int count = 1; @@ -126,7 +130,7 @@ static int qp_open(struct inode *inode, struct file *file) qpd->pos = 0; spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, count_qps, &count); + idr_for_each(&qpd->devp->qpidr, count_idrs, &count); spin_unlock_irq(&qpd->devp->lock); qpd->bufsize = count * 128; @@ -137,7 +141,7 @@ static int qp_open(struct inode *inode, struct file *file) } spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, dump_qps, qpd); + idr_for_each(&qpd->devp->qpidr, dump_qp, qpd); spin_unlock_irq(&qpd->devp->lock); qpd->buf[qpd->pos++] = 0; @@ -149,43 +153,86 @@ out: return ret; } -static ssize_t qp_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static const struct file_operations qp_debugfs_fops = { + .owner = THIS_MODULE, + .open = qp_open, + .release = qp_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static int dump_stag(int id, void *p, void *data) { - struct debugfs_qp_data *qpd = file->private_data; - loff_t pos = *ppos; - loff_t avail = qpd->pos; + struct c4iw_debugfs_data *stagd = data; + int space; + int cc; - if (pos < 0) - return -EINVAL; - if (pos >= avail) + space = stagd->bufsize - stagd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8); + if (cc < space) + stagd->pos += cc; + return 0; +} + +static int stag_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd = file->private_data; + if (!stagd) { + printk(KERN_INFO "%s null stagd?\n", __func__); return 0; - if (count > avail - pos) - count = avail - pos; + } + kfree(stagd->buf); + kfree(stagd); + return 0; +} - while (count) { - size_t len = 0; +static int stag_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd; + int ret = 0; + int count = 1; - len = min((int)count, (int)qpd->pos - (int)pos); - if (copy_to_user(buf, qpd->buf + pos, len)) - return -EFAULT; - if (len == 0) - return -EINVAL; + stagd = kmalloc(sizeof *stagd, GFP_KERNEL); + if (!stagd) { + ret = -ENOMEM; + goto out; + } + stagd->devp = inode->i_private; + stagd->pos = 0; + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, count_idrs, &count); + spin_unlock_irq(&stagd->devp->lock); - buf += len; - pos += len; - count -= len; + stagd->bufsize = count * sizeof("0x12345678\n"); + stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL); + if (!stagd->buf) { + ret = -ENOMEM; + goto err1; } - count = pos - *ppos; - *ppos = pos; - return count; + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, dump_stag, stagd); + spin_unlock_irq(&stagd->devp->lock); + + stagd->buf[stagd->pos++] = 0; + file->private_data = stagd; + goto out; +err1: + kfree(stagd); +out: + return ret; } -static const struct file_operations qp_debugfs_fops = { +static const struct file_operations stag_debugfs_fops = { .owner = THIS_MODULE, - .open = qp_open, - .release = qp_release, - .read = qp_read, + .open = stag_open, + .release = stag_release, + .read = debugfs_read, + .llseek = default_llseek, }; static int setup_debugfs(struct c4iw_dev *devp) @@ -199,6 +246,11 @@ static int setup_debugfs(struct c4iw_dev *devp) (void *)devp, &qp_debugfs_fops); if (de && de->d_inode) de->d_inode->i_size = 4096; + + de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root, + (void *)devp, &stag_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; return 0; } @@ -250,12 +302,17 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density); rdev->cqmask = rdev->lldi.ucq_density - 1; PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d " - "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x\n", + "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x " + "qp qid start %u size %u cq qid start %u size %u\n", __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), rdev->lldi.vr->pbl.start, rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start, - rdev->lldi.vr->rq.size); + rdev->lldi.vr->rq.size, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->cq.start, + rdev->lldi.vr->cq.size); PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " "qpmask 0x%x cqshift %lu cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), @@ -285,7 +342,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); goto err3; } + err = c4iw_ocqp_pool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); + goto err4; + } return 0; +err4: + c4iw_rqtpool_destroy(rdev); err3: c4iw_pblpool_destroy(rdev); err2: @@ -312,6 +376,7 @@ static void c4iw_remove(struct c4iw_dev *dev) idr_destroy(&dev->cqidr); idr_destroy(&dev->qpidr); idr_destroy(&dev->mmidr); + iounmap(dev->rdev.oc_mw_kva); ib_dealloc_device(&dev->ibdev); } @@ -327,6 +392,17 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) } devp->rdev.lldi = *infop; + devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + + (pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + + printk(KERN_INFO MOD "ocq memory: " + "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", + devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size, + devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva); + mutex_lock(&dev_mutex); ret = c4iw_rdev_open(&devp->rdev); @@ -378,46 +454,6 @@ out: return dev; } -static struct sk_buff *t4_pktgl_to_skb(const struct pkt_gl *gl, - unsigned int skb_len, - unsigned int pull_len) -{ - struct sk_buff *skb; - struct skb_shared_info *ssi; - - if (gl->tot_len <= 512) { - skb = alloc_skb(gl->tot_len, GFP_ATOMIC); - if (unlikely(!skb)) - goto out; - __skb_put(skb, gl->tot_len); - skb_copy_to_linear_data(skb, gl->va, gl->tot_len); - } else { - skb = alloc_skb(skb_len, GFP_ATOMIC); - if (unlikely(!skb)) - goto out; - __skb_put(skb, pull_len); - skb_copy_to_linear_data(skb, gl->va, pull_len); - - ssi = skb_shinfo(skb); - ssi->frags[0].page = gl->frags[0].page; - ssi->frags[0].page_offset = gl->frags[0].page_offset + pull_len; - ssi->frags[0].size = gl->frags[0].size - pull_len; - if (gl->nfrags > 1) - memcpy(&ssi->frags[1], &gl->frags[1], - (gl->nfrags - 1) * sizeof(skb_frag_t)); - ssi->nr_frags = gl->nfrags; - - skb->len = gl->tot_len; - skb->data_len = skb->len - pull_len; - skb->truesize += skb->data_len; - - /* Get a reference for the last page, we don't own it */ - get_page(gl->frags[gl->nfrags - 1].page); - } -out: - return skb; -} - static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *gl) { @@ -442,7 +478,7 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, c4iw_ev_handler(dev, qid); return 0; } else { - skb = t4_pktgl_to_skb(gl, 128, 128); + skb = cxgb4_pktgl_to_skb(gl, 128, 128); if (unlikely(!skb)) goto nomem; } diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 491e76a0327f..c13041a0aeba 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -60,7 +60,7 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if (qhp->attr.state == C4IW_QP_STATE_RTS) { attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, - &attrs, 1); + &attrs, 0); } event.event = ib_event; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index d33e1a668811..16032cdb4337 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -46,6 +46,7 @@ #include <linux/timer.h> #include <linux/io.h> #include <linux/kfifo.h> +#include <linux/mutex.h> #include <asm/byteorder.h> @@ -79,21 +80,6 @@ static inline void *cplhdr(struct sk_buff *skb) return skb->data; } -#define C4IW_WR_TO (10*HZ) - -struct c4iw_wr_wait { - wait_queue_head_t wait; - int done; - int ret; -}; - -static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) -{ - wr_waitp->ret = 0; - wr_waitp->done = 0; - init_waitqueue_head(&wr_waitp->wait); -} - struct c4iw_resource { struct kfifo tpt_fifo; spinlock_t tpt_fifo_lock; @@ -127,8 +113,11 @@ struct c4iw_rdev { struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; + struct gen_pool *ocqp_pool; u32 flags; struct cxgb4_lld_info lldi; + unsigned long oc_mw_pa; + void __iomem *oc_mw_kva; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -141,6 +130,44 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5)); } +#define C4IW_WR_TO (10*HZ) + +struct c4iw_wr_wait { + wait_queue_head_t wait; + int done; + int ret; +}; + +static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + wr_waitp->ret = 0; + wr_waitp->done = 0; + init_waitqueue_head(&wr_waitp->wait); +} + +static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + unsigned to = C4IW_WR_TO; + do { + + wait_event_timeout(wr_waitp->wait, wr_waitp->done, to); + if (!wr_waitp->done) { + printk(KERN_ERR MOD "%s - Device %s not responding - " + "tid %u qpid %u\n", func, + pci_name(rdev->lldi.pdev), hwtid, qpid); + to = to << 2; + } + } while (!wr_waitp->done); + if (wr_waitp->ret) + printk(KERN_WARNING MOD "%s: FW reply %d tid %u qpid %u\n", + pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); + return wr_waitp->ret; +} + + struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; @@ -327,6 +354,7 @@ struct c4iw_qp { struct c4iw_qp_attributes attr; struct t4_wq wq; spinlock_t lock; + struct mutex mutex; atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; @@ -579,12 +607,10 @@ struct c4iw_ep_common { struct c4iw_dev *dev; enum c4iw_ep_state state; struct kref kref; - spinlock_t lock; + struct mutex mutex; struct sockaddr_in local_addr; struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; + struct c4iw_wr_wait wr_wait; unsigned long flags; }; @@ -619,6 +645,7 @@ struct c4iw_ep { u16 plen; u16 rss_qid; u16 txq_idx; + u16 ctrlq_idx; u8 tos; }; @@ -653,8 +680,10 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); int c4iw_rqtpool_create(struct c4iw_rdev *rdev); +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev); void c4iw_pblpool_destroy(struct c4iw_rdev *rdev); void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev); void c4iw_destroy_resource(struct c4iw_resource *rscp); int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_register_device(struct c4iw_dev *dev); @@ -720,6 +749,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); void c4iw_flush_hw_cq(struct t4_cq *cq); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 82b5703b8947..273ffe49525a 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -59,7 +59,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, wr_len = roundup(sizeof *req + sizeof *sc + roundup(copy_len, T4_ULPTX_MIN_IO), 16); - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); @@ -71,7 +71,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | FW_WR_COMPL(1)); - req->wr.wr_lo = (__force __be64)&wr_wait; + req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -103,14 +103,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, len -= C4IW_MAX_INLINE_SIZE; } - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); return ret; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8f645c83a125..f66dd8bf5128 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -54,9 +54,9 @@ #include "iw_cxgb4.h" -static int fastreg_support; +static int fastreg_support = 1; module_param(fastreg_support, int, 0644); -MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=0)"); +MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static int c4iw_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, @@ -149,19 +149,28 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) addr = mm->addr; kfree(mm); - if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && - (addr < (pci_resource_start(rdev->lldi.pdev, 2) + - pci_resource_len(rdev->lldi.pdev, 2)))) { + if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 0) + + pci_resource_len(rdev->lldi.pdev, 0)))) { /* - * Map T4 DB register. + * MA_SYNC register... */ - if (vma->vm_flags & VM_READ) - return -EPERM; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 2) + + pci_resource_len(rdev->lldi.pdev, 2)))) { + + /* + * Map user DB or OCQP memory... + */ + if (addr >= rdev->oc_mw_pa) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, len, vma->vm_page_prot); @@ -382,7 +391,17 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, static int c4iw_get_mib(struct ib_device *ibdev, union rdma_protocol_stats *stats) { - return -ENOSYS; + struct tp_tcp_stats v4, v6; + struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); + + cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); + memset(stats, 0, sizeof *stats); + stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs; + stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs; + stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs; + stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs; + + return 0; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); @@ -472,6 +491,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 7065cb310553..057cb2505ea1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -31,6 +31,63 @@ */ #include "iw_cxgb4.h" +static int ocqp_support; +module_param(ocqp_support, int, 0644); +MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)"); + +static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) +{ + unsigned long flag; + spin_lock_irqsave(&qhp->lock, flag); + qhp->attr.state = state; + spin_unlock_irqrestore(&qhp->lock, flag); +} + +static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize); +} + +static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue, + pci_unmap_addr(sq, mapping)); +} + +static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (t4_sq_onchip(sq)) + dealloc_oc_sq(rdev, sq); + else + dealloc_host_sq(rdev, sq); +} + +static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (!ocqp_support || !t4_ocqp_supported()) + return -ENOSYS; + sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); + if (!sq->dma_addr) + return -ENOMEM; + sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr - + rdev->lldi.vr->ocq.start; + sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr - + rdev->lldi.vr->ocq.start); + sq->flags |= T4_SQ_ONCHIP; + return 0; +} + +static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, + &(sq->dma_addr), GFP_KERNEL); + if (!sq->queue) + return -ENOMEM; + sq->phys_addr = virt_to_phys(sq->queue); + pci_unmap_addr_set(sq, mapping, sq->dma_addr); + return 0; +} + static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { @@ -41,9 +98,7 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, dma_free_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, wq->rq.queue, dma_unmap_addr(&wq->rq, mapping)); - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->sq.memsize, wq->sq.queue, - dma_unmap_addr(&wq->sq, mapping)); + dealloc_sq(rdev, &wq->sq); c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); @@ -93,11 +148,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->rq.rqt_hwaddr) goto err4; - wq->sq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), - wq->sq.memsize, &(wq->sq.dma_addr), - GFP_KERNEL); - if (!wq->sq.queue) - goto err5; + if (user) { + if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) + goto err5; + } else + if (alloc_host_sq(rdev, &wq->sq)) + goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); @@ -130,7 +186,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto err7; @@ -144,7 +200,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, V_FW_RI_RES_WR_NRES(2) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (u64)&wr_wait; + res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -158,11 +214,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ + t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0 | V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | - V_FW_RI_RES_WR_FBMIN(3) | + V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(3) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | @@ -185,7 +242,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | - V_FW_RI_RES_WR_FBMIN(3) | + V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(3) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | @@ -198,14 +255,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, ret = c4iw_ofld_send(rdev, skb); if (ret) goto err7; - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; @@ -219,9 +269,7 @@ err7: wq->rq.memsize, wq->rq.queue, dma_unmap_addr(&wq->rq, mapping)); err6: - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->sq.memsize, wq->sq.queue, - dma_unmap_addr(&wq->sq, mapping)); + dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: @@ -235,12 +283,82 @@ err1: return -ENOMEM; } -static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, + struct ib_send_wr *wr, int max, u32 *plenp) +{ + u8 *dstp, *srcp; + u32 plen = 0; + int i; + int rem, len; + + dstp = (u8 *)immdp->data; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) > max) + return -EMSGSIZE; + srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; + plen += wr->sg_list[i].length; + rem = wr->sg_list[i].length; + while (rem) { + if (dstp == (u8 *)&sq->queue[sq->size]) + dstp = (u8 *)sq->queue; + if (rem <= (u8 *)&sq->queue[sq->size] - dstp) + len = rem; + else + len = (u8 *)&sq->queue[sq->size] - dstp; + memcpy(dstp, srcp, len); + dstp += len; + srcp += len; + rem -= len; + } + } + len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); + if (len) + memset(dstp, 0, len); + immdp->op = FW_RI_DATA_IMMD; + immdp->r1 = 0; + immdp->r2 = 0; + immdp->immdlen = cpu_to_be32(plen); + *plenp = plen; + return 0; +} + +static int build_isgl(__be64 *queue_start, __be64 *queue_end, + struct fw_ri_isgl *isglp, struct ib_sge *sg_list, + int num_sge, u32 *plenp) + { int i; + u32 plen = 0; + __be64 *flitp = (__be64 *)isglp->sge; + + for (i = 0; i < num_sge; i++) { + if ((plen + sg_list[i].length) < plen) + return -EMSGSIZE; + plen += sg_list[i].length; + *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | + sg_list[i].length); + if (++flitp == queue_end) + flitp = queue_start; + *flitp = cpu_to_be64(sg_list[i].addr); + if (++flitp == queue_end) + flitp = queue_start; + } + *flitp = (__force __be64)0; + isglp->op = FW_RI_DATA_ISGL; + isglp->r1 = 0; + isglp->nsge = cpu_to_be16(num_sge); + isglp->r2 = 0; + if (plenp) + *plenp = plen; + return 0; +} + +static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) +{ u32 plen; int size; - u8 *datap; + int ret; if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; @@ -267,43 +385,23 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) default: return -EINVAL; } + plen = 0; if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { - datap = (u8 *)wqe->send.u.immd_src[0].data; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) > - T4_MAX_SEND_INLINE) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - memcpy(datap, - (void *)(unsigned long)wr->sg_list[i].addr, - wr->sg_list[i].length); - datap += wr->sg_list[i].length; - } - wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; - wqe->send.u.immd_src[0].r1 = 0; - wqe->send.u.immd_src[0].r2 = 0; - wqe->send.u.immd_src[0].immdlen = cpu_to_be32(plen); + ret = build_immd(sq, wqe->send.u.immd_src, wr, + T4_MAX_SEND_INLINE, &plen); + if (ret) + return ret; size = sizeof wqe->send + sizeof(struct fw_ri_immd) + plen; } else { - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - plen += wr->sg_list[i].length; - wqe->send.u.isgl_src[0].sge[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.u.isgl_src[0].sge[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->send.u.isgl_src[0].sge[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.u.isgl_src[0].op = FW_RI_DATA_ISGL; - wqe->send.u.isgl_src[0].r1 = 0; - wqe->send.u.isgl_src[0].nsge = cpu_to_be16(wr->num_sge); - wqe->send.u.isgl_src[0].r2 = 0; + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->send.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } @@ -313,62 +411,40 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) wqe->send.u.immd_src[0].r2 = 0; wqe->send.u.immd_src[0].immdlen = 0; size = sizeof wqe->send + sizeof(struct fw_ri_immd); + plen = 0; } *len16 = DIV_ROUND_UP(size, 16); wqe->send.plen = cpu_to_be32(plen); return 0; } -static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { - int i; u32 plen; int size; - u8 *datap; + int ret; - if (wr->num_sge > T4_MAX_WRITE_SGE) + if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); - plen = 0; if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { - datap = (u8 *)wqe->write.u.immd_src[0].data; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) > - T4_MAX_WRITE_INLINE) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - memcpy(datap, - (void *)(unsigned long)wr->sg_list[i].addr, - wr->sg_list[i].length); - datap += wr->sg_list[i].length; - } - wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; - wqe->write.u.immd_src[0].r1 = 0; - wqe->write.u.immd_src[0].r2 = 0; - wqe->write.u.immd_src[0].immdlen = cpu_to_be32(plen); + ret = build_immd(sq, wqe->write.u.immd_src, wr, + T4_MAX_WRITE_INLINE, &plen); + if (ret) + return ret; size = sizeof wqe->write + sizeof(struct fw_ri_immd) + plen; } else { - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - plen += wr->sg_list[i].length; - wqe->write.u.isgl_src[0].sge[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->write.u.isgl_src[0].sge[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->write.u.isgl_src[0].sge[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->write.u.isgl_src[0].op = FW_RI_DATA_ISGL; - wqe->write.u.isgl_src[0].r1 = 0; - wqe->write.u.isgl_src[0].nsge = - cpu_to_be16(wr->num_sge); - wqe->write.u.isgl_src[0].r2 = 0; + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->write.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } @@ -378,6 +454,7 @@ static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) wqe->write.u.immd_src[0].r2 = 0; wqe->write.u.immd_src[0].immdlen = 0; size = sizeof wqe->write + sizeof(struct fw_ri_immd); + plen = 0; } *len16 = DIV_ROUND_UP(size, 16); wqe->write.plen = cpu_to_be32(plen); @@ -416,41 +493,27 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, struct ib_recv_wr *wr, u8 *len16) { - int i; - int plen = 0; + int ret; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - plen += wr->sg_list[i].length; - wqe->recv.isgl.sge[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->recv.isgl.sge[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->recv.isgl.sge[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - for (; i < T4_MAX_RECV_SGE; i++) { - wqe->recv.isgl.sge[i].stag = 0; - wqe->recv.isgl.sge[i].len = 0; - wqe->recv.isgl.sge[i].to = 0; - } - wqe->recv.isgl.op = FW_RI_DATA_ISGL; - wqe->recv.isgl.r1 = 0; - wqe->recv.isgl.nsge = cpu_to_be16(wr->num_sge); - wqe->recv.isgl.r2 = 0; + ret = build_isgl((__be64 *)qhp->wq.rq.queue, + (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; *len16 = DIV_ROUND_UP(sizeof wqe->recv + wr->num_sge * sizeof(struct fw_ri_sge), 16); return 0; } -static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { struct fw_ri_immd *imdp; __be64 *p; int i; int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int rem; if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) return -EINVAL; @@ -465,32 +528,28 @@ static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); - if (pbllen > T4_MAX_FR_IMMD) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); - struct fw_ri_dsgl *sglp; - - sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); - sglp->op = FW_RI_DATA_DSGL; - sglp->r1 = 0; - sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); - sglp->len0 = cpu_to_be32(pbllen); - - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *sglp, 16); - } else { - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, - 16); + WARN_ON(pbllen > T4_MAX_FR_IMMD); + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } @@ -547,7 +606,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; break; } - wqe = &qhp->wq.sq.queue[qhp->wq.sq.pidx]; + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; @@ -564,19 +625,19 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->opcode = FW_RI_SEND; else swsqe->opcode = FW_RI_SEND_WITH_INV; - err = build_rdma_send(wqe, wr, &len16); + err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; - err = build_rdma_write(wqe, wr, &len16); + err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - fw_flags |= FW_RI_RDMA_READ_INVALIDATE; + fw_flags = FW_RI_RDMA_READ_INVALIDATE; else fw_flags = 0; err = build_rdma_read(wqe, wr, &len16); @@ -589,7 +650,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(wqe, wr, &len16); + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -619,8 +680,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->opcode, swsqe->read_len); wr = wr->next; num_wrs--; - t4_sq_produce(&qhp->wq); - idx++; + t4_sq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } if (t4_wq_db_enabled(&qhp->wq)) t4_ring_sq_db(&qhp->wq, idx); @@ -656,7 +717,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, *bad_wr = wr; break; } - wqe = &qhp->wq.rq.queue[qhp->wq.rq.pidx]; + wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + + qhp->wq.rq.wq_pidx * + T4_EQ_ENTRY_SIZE); if (num_wrs) err = build_rdma_recv(qhp, wqe, wr, &len16); else @@ -675,15 +738,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wqe->recv.r2[1] = 0; wqe->recv.r2[2] = 0; wqe->recv.len16 = len16; - if (len16 < 5) - wqe->flits[8] = 0; - PDBG("%s cookie 0x%llx pidx %u\n", __func__, (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); - t4_rq_produce(&qhp->wq); + t4_rq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); wr = wr->next; num_wrs--; - idx++; } if (t4_wq_db_enabled(&qhp->wq)) t4_ring_rq_db(&qhp->wq, idx); @@ -895,46 +955,38 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, * Assumes qhp lock is held. */ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, - struct c4iw_cq *schp, unsigned long *flag) + struct c4iw_cq *schp) { int count; int flushed; + unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock_irqrestore(&qhp->lock, *flag); - /* locking heirarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&rchp->lock, *flag); + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&rchp->cq); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, *flag); + spin_unlock_irqrestore(&rchp->lock, flag); if (flushed) (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - /* locking heirarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, *flag); + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&schp->cq); c4iw_count_scqes(&schp->cq, &qhp->wq, &count); flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, *flag); + spin_unlock_irqrestore(&schp->lock, flag); if (flushed) (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock_irqsave(&qhp->lock, *flag); } -static void flush_qp(struct c4iw_qp *qhp, unsigned long *flag) +static void flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; @@ -948,23 +1000,23 @@ static void flush_qp(struct c4iw_qp *qhp, unsigned long *flag) t4_set_cq_in_error(&schp->cq); return; } - __flush_qp(qhp, rchp, schp, flag); + __flush_qp(qhp, rchp, schp); } -static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp) +static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + struct c4iw_ep *ep) { struct fw_ri_wr *wqe; int ret; - struct c4iw_wr_wait wr_wait; struct sk_buff *skb; PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, - qhp->ep->hwtid); + ep->hwtid); - skb = alloc_skb(sizeof *wqe, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(sizeof *wqe, GFP_KERNEL); if (!skb) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof *wqe); @@ -972,30 +1024,18 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_OP(FW_RI_INIT_WR) | FW_WR_COMPL(1)); wqe->flowid_len16 = cpu_to_be32( - FW_WR_FLOWID(qhp->ep->hwtid) | + FW_WR_FLOWID(ep->hwtid) | FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); - wqe->cookie = (u64)&wr_wait; + wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(&ep->com.wr_wait); ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) goto out; - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rhp->rdev.lldi.pdev)); - rhp->rdev.flags = T4_FATAL_ERROR; - ret = -EIO; - } else { - ret = wr_wait.ret; - if (ret) - printk(KERN_WARNING MOD - "%s: Abnormal close qpid %d ret %u\n", - pci_name(rhp->rdev.lldi.pdev), qhp->wq.sq.qid, - ret); - } + ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid, + qhp->wq.sq.qid, __func__); out: PDBG("%s ret %d\n", __func__, ret); return ret; @@ -1029,13 +1069,12 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) { struct fw_ri_wr *wqe; int ret; - struct c4iw_wr_wait wr_wait; struct sk_buff *skb; PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, qhp->ep->hwtid); - skb = alloc_skb(sizeof *wqe, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(sizeof *wqe, GFP_KERNEL); if (!skb) return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); @@ -1049,7 +1088,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_FLOWID(qhp->ep->hwtid) | FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); - wqe->cookie = (u64)&wr_wait; + wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = @@ -1086,19 +1125,13 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(&qhp->ep->com.wr_wait); ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) goto out; - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rhp->rdev.lldi.pdev)); - rhp->rdev.flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); out: PDBG("%s ret %d\n", __func__, ret); return ret; @@ -1111,7 +1144,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, { int ret = 0; struct c4iw_qp_attributes newattr = qhp->attr; - unsigned long flag; int disconnect = 0; int terminate = 0; int abort = 0; @@ -1122,7 +1154,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state, (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - spin_lock_irqsave(&qhp->lock, flag); + mutex_lock(&qhp->mutex); /* Process attr changes if in IDLE */ if (mask & C4IW_QP_ATTR_VALID_MODIFY) { @@ -1173,7 +1205,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.mpa_attr = attrs->mpa_attr; qhp->attr.llp_stream_handle = attrs->llp_stream_handle; qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = C4IW_QP_STATE_RTS; + set_state(qhp, C4IW_QP_STATE_RTS); /* * Ref the endpoint here and deref when we @@ -1182,15 +1214,13 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, * transition. */ c4iw_get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); ret = rdma_init(rhp, qhp); - spin_lock_irqsave(&qhp->lock, flag); if (ret) goto err; break; case C4IW_QP_STATE_ERROR: - qhp->attr.state = C4IW_QP_STATE_ERROR; - flush_qp(qhp, &flag); + set_state(qhp, C4IW_QP_STATE_ERROR); + flush_qp(qhp); break; default: ret = -EINVAL; @@ -1201,39 +1231,38 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); - qhp->attr.state = C4IW_QP_STATE_CLOSING; + set_state(qhp, C4IW_QP_STATE_CLOSING); + ep = qhp->ep; if (!internal) { abort = 0; disconnect = 1; - ep = qhp->ep; - c4iw_get_ep(&ep->com); + c4iw_get_ep(&qhp->ep->com); } - spin_unlock_irqrestore(&qhp->lock, flag); - ret = rdma_fini(rhp, qhp); - spin_lock_irqsave(&qhp->lock, flag); + ret = rdma_fini(rhp, qhp, ep); if (ret) { - ep = qhp->ep; - c4iw_get_ep(&ep->com); + if (internal) + c4iw_get_ep(&qhp->ep->com); disconnect = abort = 1; goto err; } break; case C4IW_QP_STATE_TERMINATE: - qhp->attr.state = C4IW_QP_STATE_TERMINATE; + set_state(qhp, C4IW_QP_STATE_TERMINATE); if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; - c4iw_get_ep(&ep->com); - terminate = 1; + if (!internal) + terminate = 1; disconnect = 1; + c4iw_get_ep(&qhp->ep->com); break; case C4IW_QP_STATE_ERROR: - qhp->attr.state = C4IW_QP_STATE_ERROR; + set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; disconnect = 1; ep = qhp->ep; - c4iw_get_ep(&ep->com); + c4iw_get_ep(&qhp->ep->com); } goto err; break; @@ -1249,8 +1278,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } switch (attrs->next_state) { case C4IW_QP_STATE_IDLE: - flush_qp(qhp, &flag); - qhp->attr.state = C4IW_QP_STATE_IDLE; + flush_qp(qhp); + set_state(qhp, C4IW_QP_STATE_IDLE); qhp->attr.llp_stream_handle = NULL; c4iw_put_ep(&qhp->ep->com); qhp->ep = NULL; @@ -1272,7 +1301,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ret = -EINVAL; goto out; } - qhp->attr.state = C4IW_QP_STATE_IDLE; + set_state(qhp, C4IW_QP_STATE_IDLE); break; case C4IW_QP_STATE_TERMINATE: if (!internal) { @@ -1295,15 +1324,16 @@ err: /* disassociate the LLP connection */ qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; + if (!ep) + ep = qhp->ep; qhp->ep = NULL; - qhp->attr.state = C4IW_QP_STATE_ERROR; + set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; wake_up(&qhp->wait); BUG_ON(!ep); - flush_qp(qhp, &flag); + flush_qp(qhp); out: - spin_unlock_irqrestore(&qhp->lock, flag); + mutex_unlock(&qhp->mutex); if (terminate) post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); @@ -1325,7 +1355,6 @@ out: */ if (free) c4iw_put_ep(&ep->com); - PDBG("%s exit state %d\n", __func__, qhp->attr.state); return ret; } @@ -1370,7 +1399,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; - struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4; + struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; PDBG("%s ib_pd %p\n", __func__, pd); @@ -1440,6 +1469,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; spin_lock_init(&qhp->lock); + mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); @@ -1468,7 +1498,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = -ENOMEM; goto err6; } - + if (t4_sq_onchip(&qhp->wq.sq)) { + mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); + if (!mm5) { + ret = -ENOMEM; + goto err7; + } + uresp.flags = C4IW_QPF_ONCHIP; + } else + uresp.flags = 0; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; @@ -1477,6 +1515,10 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); + if (mm5) { + uresp.ma_sync_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_key = ucontext->key; @@ -1488,9 +1530,9 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) - goto err7; + goto err8; mm1->key = uresp.sq_key; - mm1->addr = virt_to_phys(qhp->wq.sq.queue); + mm1->addr = qhp->wq.sq.phys_addr; mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); insert_mmap(ucontext, mm1); mm2->key = uresp.rq_key; @@ -1505,6 +1547,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm4->addr = qhp->wq.rq.udb; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); + if (mm5) { + mm5->key = uresp.ma_sync_key; + mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + A_PCIE_MA_SYNC) & PAGE_MASK; + mm5->len = PAGE_SIZE; + insert_mmap(ucontext, mm5); + } } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); @@ -1512,6 +1561,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); return &qhp->ibqp; +err8: + kfree(mm5); err7: kfree(mm4); err6: diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index fb195d1d9015..4fb50d58b493 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -110,11 +110,12 @@ static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev) spin_lock_init(&rdev->resource.qid_fifo_lock); - if (kfifo_alloc(&rdev->resource.qid_fifo, T4_MAX_QIDS * sizeof(u32), - GFP_KERNEL)) + if (kfifo_alloc(&rdev->resource.qid_fifo, rdev->lldi.vr->qp.size * + sizeof(u32), GFP_KERNEL)) return -ENOMEM; - for (i = T4_QID_BASE; i < T4_QID_BASE + T4_MAX_QIDS; i++) + for (i = rdev->lldi.vr->qp.start; + i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) if (!(i & rdev->qpmask)) kfifo_in(&rdev->resource.qid_fifo, (unsigned char *) &i, sizeof(u32)); @@ -310,6 +311,9 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + if (!addr && printk_ratelimit()) + printk(KERN_WARNING MOD "%s: Out of PBL memory\n", + pci_name(rdev->lldi.pdev)); return (u32)addr; } @@ -369,6 +373,9 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + if (!addr && printk_ratelimit()) + printk(KERN_WARNING MOD "%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); return (u32)addr; } @@ -415,3 +422,59 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) { gen_pool_destroy(rdev->rqt_pool); } + +/* + * On-Chip QP Memory. + */ +#define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */ + +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + return (u32)addr; +} + +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); +} + +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) +{ + unsigned start, chunk, top; + + rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1); + if (!rdev->ocqp_pool) + return -ENOMEM; + + start = rdev->lldi.vr->ocq.start; + chunk = rdev->lldi.vr->ocq.size; + top = start + chunk; + + while (start < top) { + chunk = min(top - start + 1, chunk); + if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { + PDBG("%s failed to add OCQP chunk (%x/%x)\n", + __func__, start, chunk); + if (chunk <= 1024 << MIN_OCQP_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all OCQP chunks (%x/%x)\n", + start, top - start); + return 0; + } + chunk >>= 1; + } else { + PDBG("%s added OCQP chunk (%x/%x)\n", + __func__, start, chunk); + start += chunk; + } + } + return 0; +} + +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->ocqp_pool); +} diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 9cf8d85bfcff..70004425d695 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -36,8 +36,6 @@ #include "t4_msg.h" #include "t4fw_ri_api.h" -#define T4_QID_BASE 1024 -#define T4_MAX_QIDS 256 #define T4_MAX_NUM_QP (1<<16) #define T4_MAX_NUM_CQ (1<<15) #define T4_MAX_NUM_PD (1<<15) @@ -54,6 +52,7 @@ #define T4_STAG_UNSET 0xffffffff #define T4_FW_MAJ 0 #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) +#define A_PCIE_MA_SYNC 0x30b4 struct t4_status_page { __be32 rsvd1; /* flit 0 - hw owns */ @@ -65,10 +64,10 @@ struct t4_status_page { u8 db_off; }; -#define T4_EQ_SIZE 64 +#define T4_EQ_ENTRY_SIZE 64 -#define T4_SQ_NUM_SLOTS 4 -#define T4_SQ_NUM_BYTES (T4_EQ_SIZE * T4_SQ_NUM_SLOTS) +#define T4_SQ_NUM_SLOTS 5 +#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ @@ -80,11 +79,11 @@ struct t4_status_page { sizeof(struct fw_ri_rdma_write_wr) - \ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ - sizeof(struct fw_ri_immd))) + sizeof(struct fw_ri_immd)) & ~31UL) #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) #define T4_RQ_NUM_SLOTS 2 -#define T4_RQ_NUM_BYTES (T4_EQ_SIZE * T4_RQ_NUM_SLOTS) +#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) #define T4_MAX_RECV_SGE 4 union t4_wr { @@ -97,20 +96,18 @@ union t4_wr { struct fw_ri_fr_nsmr_wr fr; struct fw_ri_inv_lstag_wr inv; struct t4_status_page status; - __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; }; union t4_recv_wr { struct fw_ri_recv_wr recv; struct t4_status_page status; - __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; }; static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, enum fw_wr_opcodes opcode, u8 flags, u8 len16) { - int slots_used; - wqe->send.opcode = (u8)opcode; wqe->send.flags = flags; wqe->send.wrid = wrid; @@ -118,12 +115,6 @@ static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, wqe->send.r1[1] = 0; wqe->send.r1[2] = 0; wqe->send.len16 = len16; - - slots_used = DIV_ROUND_UP(len16*16, T4_EQ_SIZE); - while (slots_used < T4_SQ_NUM_SLOTS) { - wqe->flits[slots_used * T4_EQ_SIZE / sizeof(__be64)] = 0; - slots_used++; - } } /* CQE/AE status codes */ @@ -276,10 +267,36 @@ struct t4_swsqe { u16 idx; }; +static inline pgprot_t t4_pgprot_wc(pgprot_t prot) +{ +#if defined(__i386__) || defined(__x86_64__) + return pgprot_writecombine(prot); +#elif defined(CONFIG_PPC64) + return __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE) & + ~(pgprot_t)_PAGE_GUARDED); +#else + return pgprot_noncached(prot); +#endif +} + +static inline int t4_ocqp_supported(void) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return 1; +#else + return 0; +#endif +} + +enum { + T4_SQ_ONCHIP = (1<<0), +}; + struct t4_sq { union t4_wr *queue; dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); + unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; u64 udb; @@ -289,6 +306,8 @@ struct t4_sq { u16 size; u16 cidx; u16 pidx; + u16 wq_pidx; + u16 flags; }; struct t4_swrqe { @@ -310,6 +329,7 @@ struct t4_rq { u16 size; u16 cidx; u16 pidx; + u16 wq_pidx; }; struct t4_wq { @@ -340,11 +360,14 @@ static inline u32 t4_rq_avail(struct t4_wq *wq) return wq->rq.size - 1 - wq->rq.in_use; } -static inline void t4_rq_produce(struct t4_wq *wq) +static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) { wq->rq.in_use++; if (++wq->rq.pidx == wq->rq.size) wq->rq.pidx = 0; + wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) + wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; } static inline void t4_rq_consume(struct t4_wq *wq) @@ -355,6 +378,11 @@ static inline void t4_rq_consume(struct t4_wq *wq) wq->rq.cidx = 0; } +static inline int t4_sq_onchip(struct t4_sq *sq) +{ + return sq->flags & T4_SQ_ONCHIP; +} + static inline int t4_sq_empty(struct t4_wq *wq) { return wq->sq.in_use == 0; @@ -370,11 +398,14 @@ static inline u32 t4_sq_avail(struct t4_wq *wq) return wq->sq.size - 1 - wq->sq.in_use; } -static inline void t4_sq_produce(struct t4_wq *wq) +static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) { wq->sq.in_use++; if (++wq->sq.pidx == wq->sq.size) wq->sq.pidx = 0; + wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) + wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; } static inline void t4_sq_consume(struct t4_wq *wq) @@ -386,44 +417,39 @@ static inline void t4_sq_consume(struct t4_wq *wq) static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) { - inc *= T4_SQ_NUM_SLOTS; wmb(); writel(QID(wq->sq.qid) | PIDX(inc), wq->db); } static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) { - inc *= T4_RQ_NUM_SLOTS; wmb(); writel(QID(wq->rq.qid) | PIDX(inc), wq->db); } static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->sq.queue[wq->sq.size].status.qp_err; + return wq->rq.queue[wq->rq.size].status.qp_err; } static inline void t4_set_wq_in_error(struct t4_wq *wq) { - wq->sq.queue[wq->sq.size].status.qp_err = 1; wq->rq.queue[wq->rq.size].status.qp_err = 1; } static inline void t4_disable_wq_db(struct t4_wq *wq) { - wq->sq.queue[wq->sq.size].status.db_off = 1; wq->rq.queue[wq->rq.size].status.db_off = 1; } static inline void t4_enable_wq_db(struct t4_wq *wq) { - wq->sq.queue[wq->sq.size].status.db_off = 0; wq->rq.queue[wq->rq.size].status.db_off = 0; } static inline int t4_wq_db_enabled(struct t4_wq *wq) { - return !wq->sq.queue[wq->sq.size].status.db_off; + return !wq->rq.queue[wq->rq.size].status.db_off; } struct t4_cq { diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index fc706bd07fae..dc193c292671 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -826,4 +826,14 @@ struct ulptx_idata { #define S_ULPTX_NSGE 0 #define M_ULPTX_NSGE 0xFFFF #define V_ULPTX_NSGE(x) ((x) << S_ULPTX_NSGE) + +#define S_RX_DACK_MODE 29 +#define M_RX_DACK_MODE 0x3 +#define V_RX_DACK_MODE(x) ((x) << S_RX_DACK_MODE) +#define G_RX_DACK_MODE(x) (((x) >> S_RX_DACK_MODE) & M_RX_DACK_MODE) + +#define S_RX_DACK_CHANGE 31 +#define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) +#define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) + #endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index ed6414abde02..e6669d54770e 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -50,7 +50,13 @@ struct c4iw_create_cq_resp { __u32 qid_mask; }; + +enum { + C4IW_QPF_ONCHIP = (1<<0) +}; + struct c4iw_create_qp_resp { + __u64 ma_sync_key; __u64 sq_key; __u64 rq_key; __u64 sq_db_gts_key; @@ -62,5 +68,6 @@ struct c4iw_create_qp_resp { __u32 sq_size; __u32 rq_size; __u32 qid_mask; + __u32 flags; }; #endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 0136abd50dd4..aaf6023a4835 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -112,7 +112,7 @@ struct ehca_sport { struct ehca_shca { struct ib_device ib_device; - struct of_device *ofdev; + struct platform_device *ofdev; u8 num_ports; int hw_level; struct list_head shca_list; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 3b87589b8ea0..d9b1bb40f480 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -122,21 +122,21 @@ int ehca_create_eq(struct ehca_shca *shca, /* register interrupt handlers and initialize work queues */ if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, IRQF_DISABLED, "ehca_eq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, IRQF_DISABLED, "ehca_neq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); } eq->is_initialized = 1; diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index ecb51b396c42..c240e9972cb0 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -360,7 +360,8 @@ static int ehca_sense_attributes(struct ehca_shca *shca) * a firmware property, so it's valid across all adapters */ if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC); + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); /* translate supported MR page sizes; always support 4K */ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; @@ -712,7 +713,7 @@ static struct attribute_group ehca_dev_attr_grp = { .attrs = ehca_dev_attrs }; -static int __devinit ehca_probe(struct of_device *dev, +static int __devinit ehca_probe(struct platform_device *dev, const struct of_device_id *id) { struct ehca_shca *shca; @@ -878,7 +879,7 @@ probe1: return -EINVAL; } -static int __devexit ehca_remove(struct of_device *dev) +static int __devexit ehca_remove(struct platform_device *dev) { struct ehca_shca *shca = dev_get_drvdata(&dev->dev); unsigned long flags; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 31a68b9c52d0..43cae84005f0 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -171,7 +171,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) } ret = ehca_reg_maxmr(shca, e_maxmr, - (void *)ehca_map_vaddr((void *)KERNELBASE), + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); @@ -933,11 +933,6 @@ int ehca_unmap_fmr(struct list_head *fmr_list) /* check all FMR belong to same SHCA, and check internal flag */ list_for_each_entry(ib_fmr, fmr_list, list) { prev_shca = shca; - if (!ib_fmr) { - ehca_gen_err("bad fmr=%p in list", ib_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } shca = container_of(ib_fmr->device, struct ehca_shca, ib_device); e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); @@ -1641,7 +1636,7 @@ int ehca_reg_internal_maxmr( /* register internal max-MR on HCA */ size_maxmr = ehca_mr_len; - iova_start = (u64 *)ehca_map_vaddr((void *)KERNELBASE); + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, @@ -2214,7 +2209,7 @@ int ehca_mr_is_maxmr(u64 size, { /* a MR is treated as max-MR only if it fits following: */ if ((size == ehca_mr_len) && - (iova_start == (void *)ehca_map_vaddr((void *)KERNELBASE))) { + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { ehca_gen_dbg("this is a max-MR"); return 1; } else diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 47d388ec1cde..32fb34201aba 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -251,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) return ST_UD; case IB_QPT_RAW_IPV6: return -EINVAL; - case IB_QPT_RAW_ETY: + case IB_QPT_RAW_ETHERTYPE: return -EINVAL; default: ehca_gen_err("Invalid ibqptype=%x", ibqptype); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 4d5dc3304d42..e6f9cdd94c7a 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -269,6 +269,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, struct ehca_cq *cq, struct ehca_alloc_cq_parms *param) { + int rc; u64 ret; unsigned long outs[PLPAR_HCALL9_BUFSIZE]; @@ -283,8 +284,19 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, param->act_nr_of_entries = (u32)outs[3]; param->act_pages = (u32)outs[4]; - if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); @@ -295,6 +307,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, struct ehca_alloc_qp_parms *parms, int is_user) { + int rc; u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; unsigned long outs[PLPAR_HCALL9_BUFSIZE]; @@ -358,8 +371,19 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, parms->rqueue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 39c1c3618ec7..a46e514c367b 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -49,7 +49,7 @@ #include "hipz_hw.h" /* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize * resources, create the empty EQPT (ring). */ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index b3e0e72e8a73..077376ff3d28 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -42,10 +42,9 @@ #include "ehca_classes.h" #include "hipz_hw.h" -int hcall_map_page(u64 physaddr, u64 *mapaddr) +u64 hcall_map_page(u64 physaddr) { - *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); - return 0; + return (u64)ioremap(physaddr, EHCA_PAGESIZE); } int hcall_unmap_page(u64 mapaddr) @@ -58,9 +57,9 @@ int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { if (!is_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; } else galpas->kernel.fw_handle = 0; diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h index 204227d5303a..d1b029910249 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -83,7 +83,7 @@ int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, int hcp_galpas_dtor(struct h_galpas *galpas); -int hcall_map_page(u64 physaddr, u64 * mapaddr); +u64 hcall_map_page(u64 physaddr); int hcall_unmap_page(u64 mapaddr); diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index fa3df82681df..4496f2820c92 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ +ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \ -DIPATH_KERN_TYPE=0 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index d4ce8b63e19e..daef61d5e5bb 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -65,7 +65,8 @@ static const struct file_operations diag_file_ops = { .write = ipath_diag_write, .read = ipath_diag_read, .open = ipath_diag_open, - .release = ipath_diag_release + .release = ipath_diag_release, + .llseek = default_llseek, }; static ssize_t ipath_diagpkt_write(struct file *fp, @@ -75,6 +76,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, static const struct file_operations diagpkt_file_ops = { .owner = THIS_MODULE, .write = ipath_diagpkt_write, + .llseek = noop_llseek, }; static atomic_t diagpkt_count = ATOMIC_INIT(0); diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 21337468c652..765f0fc1da76 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -390,6 +390,8 @@ done: ipath_enable_armlaunch(dd); } +static void cleanup_device(struct ipath_devdata *dd); + static int __devinit ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -616,8 +618,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail; bail_irqsetup: - if (pdev->irq) - free_irq(pdev->irq, dd); + cleanup_device(dd); + + if (dd->ipath_irq) + dd->ipath_f_free_irq(dd); + + if (dd->ipath_f_cleanup) + dd->ipath_f_cleanup(dd); bail_iounmap: iounmap((volatile void __iomem *) dd->ipath_kregbase); @@ -635,7 +642,7 @@ bail: return ret; } -static void __devexit cleanup_device(struct ipath_devdata *dd) +static void cleanup_device(struct ipath_devdata *dd) { int port; struct ipath_portdata **tmp; diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 9c5c66d16a23..6078992da3f0 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -63,7 +63,8 @@ static const struct file_operations ipath_file_ops = { .open = ipath_open, .release = ipath_close, .poll = ipath_poll, - .mmap = ipath_mmap + .mmap = ipath_mmap, + .llseek = noop_llseek, }; /* @@ -2055,7 +2056,7 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_lock(&ipath_mutex); - fd = (struct ipath_filedata *) fp->private_data; + fd = fp->private_data; fp->private_data = NULL; pd = fd->pd; if (!pd) { diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 2fca70836dae..8c8afc716b98 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; @@ -103,6 +104,7 @@ static ssize_t atomic_stats_read(struct file *file, char __user *buf, static const struct file_operations atomic_stats_ops = { .read = atomic_stats_read, + .llseek = default_llseek, }; static ssize_t atomic_counters_read(struct file *file, char __user *buf, @@ -120,6 +122,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, static const struct file_operations atomic_counters_ops = { .read = atomic_counters_read, + .llseek = default_llseek, }; static ssize_t flash_read(struct file *file, char __user *buf, @@ -224,6 +227,7 @@ bail: static const struct file_operations flash_ops = { .read = flash_read, .write = flash_write, + .llseek = default_llseek, }; static int create_device_files(struct super_block *sb, @@ -358,13 +362,13 @@ bail: return ret; } -static int ipathfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ipathfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - ipathfs_fill_super, mnt); - if (ret >= 0) - ipath_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, ipathfs_fill_super); + if (!IS_ERR(ret)) + ipath_super = ret->d_sb; return ret; } @@ -407,7 +411,7 @@ bail: static struct file_system_type ipathfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = ipathfs_get_sb, + .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig index 4175a4bd0c78..bd995b2b50d8 100644 --- a/drivers/infiniband/hw/mlx4/Kconfig +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -1,5 +1,6 @@ config MLX4_INFINIBAND tristate "Mellanox ConnectX HCA support" + depends on NETDEVICES && NETDEV_10000 && PCI select MLX4_CORE ---help--- This driver provides low-level InfiniBand support for diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 11a236f8d884..4b8f9c49397e 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -30,66 +30,163 @@ * SOFTWARE. */ +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> + #include <linux/slab.h> +#include <linux/inet.h> +#include <linux/string.h> #include "mlx4_ib.h" -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port) { - struct mlx4_dev *dev = to_mdev(pd->device)->dev; - struct mlx4_ib_ah *ah; + struct in6_addr in6; - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + *is_mcast = 0; - memset(&ah->av, 0, sizeof ah->av); + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); + if (rdma_link_local_addr(&in6)) + rdma_get_ll_mac(&in6, mac); + else if (rdma_is_multicast_addr(&in6)) { + rdma_get_mcast_mac(&in6, mac); + *is_mcast = 1; + } else + return -EINVAL; - ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.g_slid = ah_attr->src_path_bits; - ah->av.dlid = cpu_to_be16(ah_attr->dlid); - if (ah_attr->static_rate) { - ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; - while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && - !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) - --ah->av.stat_rate; - } - ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + return 0; +} + +static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_dev *dev = to_mdev(pd->device)->dev; + + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = ah_attr->src_path_bits; if (ah_attr->ah_flags & IB_AH_GRH) { - ah->av.g_slid |= 0x80; - ah->av.gid_index = ah_attr->grh.sgid_index; - ah->av.hop_limit = ah_attr->grh.hop_limit; - ah->av.sl_tclass_flowlabel |= + ah->av.ib.g_slid |= 0x80; + ah->av.ib.gid_index = ah_attr->grh.sgid_index; + ah->av.ib.hop_limit = ah_attr->grh.hop_limit; + ah->av.ib.sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); - memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); + } + + ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; } + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } +static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_dev *dev = ibdev->dev; + union ib_gid sgid; + u8 mac[6]; + int err; + int is_mcast; + u16 vlan_tag; + + err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); + if (err) + return ERR_PTR(err); + + memcpy(ah->av.eth.mac, mac, 6); + err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); + if (err) + return ERR_PTR(err); + vlan_tag = rdma_get_vlan_id(&sgid); + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; + ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.vlan = cpu_to_be16(vlan_tag); + if (ah_attr->static_rate) { + ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) + --ah->av.eth.stat_rate; + } + + /* + * HW requires multicast LID so we just choose one. + */ + if (is_mcast) + ah->av.ib.dlid = cpu_to_be16(0xc000); + + memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); + ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + + return &ah->ibah; +} + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah; + struct ib_ah *ret; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { + if (!(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + } else { + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + ret = create_iboe_ah(pd, ah_attr, ah); + } + + if (IS_ERR(ret)) + kfree(ah); + + return ret; + } else + return create_ib_ah(pd, ah_attr, ah); /* never fails */ +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); + enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(ah->av.dlid); - ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; - if (ah->av.stat_rate) - ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; - ah_attr->src_path_bits = ah->av.g_slid & 0x7F; + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; + ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; + if (ah->av.ib.stat_rate) + ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = - be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; - ah_attr->grh.hop_limit = ah->av.hop_limit; - ah_attr->grh.sgid_index = ah->av.gid_index; - memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.ib.hop_limit; + ah_attr->grh.sgid_index = ah->av.ib.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f38d5b118927..c9a8dd63b9e2 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -311,19 +311,25 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; int ret; + enum rdma_link_layer ll; - for (p = 0; p < dev->num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) { + ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1); for (q = 0; q <= 1; ++q) { - agent = ib_register_mad_agent(&dev->ib_dev, p + 1, - q ? IB_QPT_GSI : IB_QPT_SMI, - NULL, 0, send_handler, - NULL, NULL); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - goto err; - } - dev->send_agent[p][q] = agent; + if (ll == IB_LINK_LAYER_INFINIBAND) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + dev->send_agent[p][q] = agent; + } else + dev->send_agent[p][q] = NULL; } + } return 0; @@ -344,8 +350,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; - dev->send_agent[p][q] = NULL; - ib_unregister_mad_agent(agent); + if (agent) { + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } } if (dev->sm_ah[p]) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4e94e360e43b..bf3e20cd0298 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -35,9 +35,14 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/rtnetlink.h> +#include <linux/if_vlan.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/cmd.h> @@ -58,6 +63,15 @@ static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; +struct update_gid_work { + struct work_struct work; + union ib_gid gids[128]; + struct mlx4_ib_dev *dev; + int port; +}; + +static struct workqueue_struct *wq; + static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -66,6 +80,8 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } +static union ib_gid zgid; + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -135,7 +151,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; - props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); + props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; @@ -154,28 +170,19 @@ out: return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +static enum rdma_link_layer +mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - memset(props, 0, sizeof *props); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + struct mlx4_dev *dev = to_mdev(device)->dev; - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; + return dev->caps.port_mask & (1 << (port_num - 1)) ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; +} +static int ib_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -196,6 +203,80 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + return 0; +} + +static u8 state_to_phys_state(enum ib_port_state state) +{ + return state == IB_PORT_ACTIVE ? 5 : 3; +} + +static int eth_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ + struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe; + struct net_device *ndev; + enum ib_mtu tmp; + + props->active_width = IB_WIDTH_4X; + props->active_speed = 4; + props->port_cap_flags = IB_PORT_CM_SUP; + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = 1; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->max_mtu = IB_MTU_2048; + props->subnet_timeout = 0; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = 0; + props->state = IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + props->active_mtu = IB_MTU_256; + spin_lock(&iboe->lock); + ndev = iboe->netdevs[port - 1]; + if (!ndev) + goto out; + + tmp = iboe_get_mtu(ndev->mtu); + props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; + + props->state = netif_running(ndev) && netif_oper_up(ndev) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + +out: + spin_unlock(&iboe->lock); + return 0; +} + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? + ib_link_query_port(ibdev, port, props, out_mad) : + eth_link_query_port(ibdev, port, props, out_mad); + out: kfree(in_mad); kfree(out_mad); @@ -203,8 +284,8 @@ out: return err; } -static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -241,6 +322,25 @@ out: return err; } +static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + + *gid = dev->iboe.gid_table[port - 1][index]; + + return 0; +} + +static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + return __mlx4_ib_query_gid(ibdev, port, index, gid); + else + return iboe_query_gid(ibdev, port, index, gid); +} + static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { @@ -272,14 +372,32 @@ out: static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { + struct mlx4_cmd_mailbox *mailbox; + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; - if (mask & IB_DEVICE_MODIFY_NODE_DESC) { - spin_lock(&to_mdev(ibdev)->sm_lock); - memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock(&to_mdev(ibdev)->sm_lock); - } + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + spin_lock(&to_mdev(ibdev)->sm_lock); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock(&to_mdev(ibdev)->sm_lock); + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); + if (IS_ERR(mailbox)) + return 0; + + memset(mailbox->buf, 0, 256); + memcpy(mailbox->buf, props->node_desc, 64); + mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } @@ -289,6 +407,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, { struct mlx4_cmd_mailbox *mailbox; int err; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) @@ -304,7 +423,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); mlx4_free_cmd_mailbox(dev->dev, mailbox); @@ -447,18 +566,132 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) +{ + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_gid_entry *ge; + + ge = kzalloc(sizeof *ge, GFP_KERNEL); + if (!ge) + return -ENOMEM; + + ge->gid = *gid; + if (mlx4_ib_add_mc(mdev, mqp, gid)) { + ge->port = mqp->port; + ge->added = 1; + } + + mutex_lock(&mqp->mutex); + list_add_tail(&ge->list, &mqp->gid_list); + mutex_unlock(&mqp->mutex); + + return 0; +} + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid) +{ + u8 mac[6]; + struct net_device *ndev; + int ret = 0; + + if (!mqp->port) + return 0; + + spin_lock(&mdev->iboe.lock); + ndev = mdev->iboe.netdevs[mqp->port - 1]; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + + if (ndev) { + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + rtnl_lock(); + dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac); + ret = 1; + rtnl_unlock(); + dev_put(ndev); + } + + return ret; +} + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw, - !!(to_mqp(ibqp)->flags & - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + if (err) + return err; + + err = add_gid_entry(ibqp, gid); + if (err) + goto err_add; + + return 0; + +err_add: + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw); + return err; +} + +static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) +{ + struct mlx4_ib_gid_entry *ge; + struct mlx4_ib_gid_entry *tmp; + struct mlx4_ib_gid_entry *ret = NULL; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!memcmp(raw, ge->gid.raw, 16)) { + ret = ge; + break; + } + } + + return ret; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u8 mac[6]; + struct net_device *ndev; + struct mlx4_ib_gid_entry *ge; + + err = mlx4_multicast_detach(mdev->dev, + &mqp->mqp, gid->raw); + if (err) + return err; + + mutex_lock(&mqp->mutex); + ge = find_gid_entry(mqp, gid->raw); + if (ge) { + spin_lock(&mdev->iboe.lock); + ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + if (ndev) { + rtnl_lock(); + dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac); + rtnl_unlock(); + dev_put(ndev); + } + list_del(&ge->list); + kfree(ge); + } else + printk(KERN_WARNING "could not find mgid entry\n"); + + mutex_unlock(&mqp->mutex); + + return 0; } static int init_node_data(struct mlx4_ib_dev *dev) @@ -543,15 +776,215 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + +static void update_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + struct ib_event event; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox)); + return; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof gw->gids); + + err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) + printk(KERN_WARNING "set port command failed\n"); + else { + memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); + event.device = &gw->dev->ib_dev; + event.element.port_num = gw->port; + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + + mlx4_free_cmd_mailbox(dev, mailbox); + kfree(gw); +} + +static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) +{ + struct net_device *ndev = dev->iboe.netdevs[port - 1]; + struct update_gid_work *work; + struct net_device *tmp; + int i; + u8 *hits; + int ret; + union ib_gid gid; + int free; + int found; + int need_update = 0; + u16 vid; + + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + hits = kzalloc(128, GFP_ATOMIC); + if (!hits) { + ret = -ENOMEM; + goto out; + } + + read_lock(&dev_base_lock); + for_each_netdev(&init_net, tmp) { + if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { + gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + vid = rdma_vlan_dev_vlan_id(tmp); + mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); + found = 0; + free = -1; + for (i = 0; i < 128; ++i) { + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + free = i; + if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { + hits[i] = 1; + found = 1; + break; + } + } + + if (!found) { + if (tmp == ndev && + (memcmp(&dev->iboe.gid_table[port - 1][0], + &gid, sizeof gid) || + !memcmp(&dev->iboe.gid_table[port - 1][0], + &zgid, sizeof gid))) { + dev->iboe.gid_table[port - 1][0] = gid; + ++need_update; + hits[0] = 1; + } else if (free >= 0) { + dev->iboe.gid_table[port - 1][free] = gid; + hits[free] = 1; + ++need_update; + } + } + } + } + read_unlock(&dev_base_lock); + + for (i = 0; i < 128; ++i) + if (!hits[i]) { + if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + ++need_update; + dev->iboe.gid_table[port - 1][i] = zgid; + } + + if (need_update) { + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + } else + kfree(work); + + kfree(hits); + return 0; + +out: + kfree(work); + return ret; +} + +static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) +{ + switch (event) { + case NETDEV_UP: + case NETDEV_CHANGEADDR: + update_ipv6_gids(dev, port, 0); + break; + + case NETDEV_DOWN: + update_ipv6_gids(dev, port, 1); + dev->iboe.netdevs[port - 1] = NULL; + } +} + +static void netdev_added(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 0); +} + +static void netdev_removed(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 1); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct mlx4_ib_dev *ibdev; + struct net_device *oldnd; + struct mlx4_ib_iboe *iboe; + int port; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + iboe = &ibdev->iboe; + + spin_lock(&iboe->lock); + mlx4_foreach_ib_transport_port(port, ibdev->dev) { + oldnd = iboe->netdevs[port - 1]; + iboe->netdevs[port - 1] = + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port); + if (oldnd != iboe->netdevs[port - 1]) { + if (iboe->netdevs[port - 1]) + netdev_added(ibdev, port); + else + netdev_removed(ibdev, port); + } + } + + if (dev == iboe->netdevs[0] || + (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) + handle_en_event(ibdev, 1, event); + else if (dev == iboe->netdevs[1] + || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) + handle_en_event(ibdev, 2, event); + + spin_unlock(&iboe->lock); + + return NOTIFY_DONE; +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; int i; + int err; + struct mlx4_ib_iboe *iboe; printk_once(KERN_INFO "%s", mlx4_ib_version); - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + mlx4_foreach_ib_transport_port(i, dev) num_ports++; /* No point in registering a device with no ports... */ @@ -564,6 +997,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) return NULL; } + iboe = &ibdev->iboe; + if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; @@ -612,6 +1047,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; @@ -656,6 +1092,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + spin_lock_init(&iboe->lock); + if (init_node_data(ibdev)) goto err_map; @@ -668,16 +1106,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_mad_init(ibdev)) goto err_reg; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { + iboe->nb.notifier_call = mlx4_ib_netdev_event; + err = register_netdevice_notifier(&iboe->nb); + if (err) + goto err_reg; + } + for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[i])) - goto err_reg; + goto err_notif; } ibdev->ib_active = true; return ibdev; +err_notif: + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + flush_workqueue(wq); + err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -703,11 +1153,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + iounmap(ibdev->uar_map); - for (p = 1; p <= ibdev->num_ports; ++p) + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); - iounmap(ibdev->uar_map); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); @@ -747,19 +1202,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, } static struct mlx4_interface mlx4_ib_interface = { - .add = mlx4_ib_add, - .remove = mlx4_ib_remove, - .event = mlx4_ib_event + .add = mlx4_ib_add, + .remove = mlx4_ib_remove, + .event = mlx4_ib_event, + .protocol = MLX4_PROTOCOL_IB }; static int __init mlx4_ib_init(void) { - return mlx4_register_interface(&mlx4_ib_interface); + int err; + + wq = create_singlethread_workqueue("mlx4_ib"); + if (!wq) + return -ENOMEM; + + err = mlx4_register_interface(&mlx4_ib_interface); + if (err) { + destroy_workqueue(wq); + return err; + } + + return 0; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); + destroy_workqueue(wq); } module_init(mlx4_ib_init); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 3486d7675e56..2a322f21049f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -112,6 +112,13 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; +struct mlx4_ib_gid_entry { + struct list_head list; + union ib_gid gid; + int added; + u8 port; +}; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -138,6 +145,8 @@ struct mlx4_ib_qp { u8 resp_depth; u8 sq_no_prefetch; u8 state; + int mlx_type; + struct list_head gid_list; }; struct mlx4_ib_srq { @@ -157,7 +166,14 @@ struct mlx4_ib_srq { struct mlx4_ib_ah { struct ib_ah ibah; - struct mlx4_av av; + union mlx4_ext_av av; +}; + +struct mlx4_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[MLX4_MAX_PORTS]; + struct notifier_block nb; + union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; struct mlx4_ib_dev { @@ -176,6 +192,7 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; + struct mlx4_ib_iboe iboe; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) @@ -314,9 +331,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port); + static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { - return !!(ah->av.g_slid & 0x80); + u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; + + if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) + return 1; + + return !!(ah->av.ib.g_slid & 0x80); } +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 1d27b9a8e2d6..dca55b19a6f1 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -226,7 +226,7 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device struct mlx4_ib_fast_reg_page_list *mfrpl; int size = page_list_len * sizeof (u64); - if (size > PAGE_SIZE) + if (page_list_len > MLX4_MAX_FAST_REG_PAGES) return ERR_PTR(-EINVAL); mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 6a60827b2301..9a7794ac34c1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -33,9 +33,11 @@ #include <linux/log2.h> #include <linux/slab.h> +#include <linux/netdevice.h> #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> +#include <rdma/ib_addr.h> #include <linux/mlx4/qp.h> @@ -48,17 +50,26 @@ enum { enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, - MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX4_IB_LINK_TYPE_IB = 0, + MLX4_IB_LINK_TYPE_ETH = 1 }; enum { /* - * Largest possible UD header: send with GRH and immediate data. + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 72, + MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; +enum { + MLX4_IB_IBOE_ETHERTYPE = 0x8915 +}; + struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; @@ -462,6 +473,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + INIT_LIST_HEAD(&qp->gid_list); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -649,6 +661,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re } } +static void del_gid_entries(struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + list_del(&ge->list); + kfree(ge); + } +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { @@ -695,6 +717,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (!qp->ibqp.srq) mlx4_db_free(dev->dev, &qp->db); } + + del_gid_entries(qp); } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, @@ -852,6 +876,14 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx4_qp_path *path, u8 port) { + int err; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET; + u8 mac[6]; + int is_mcast; + u16 vlan_tag; + int vidx; + path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { @@ -879,12 +911,49 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, memcpy(path->rgid, ah->grh.dgid.raw, 16); } - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + if (is_eth) { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1); + + if (!(ah->ah_flags & IB_AH_GRH)) + return -1; + + err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); + if (err) + return err; + + memcpy(path->dmac, mac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* use index 0 into MAC table for IBoE */ + path->grh_mylmc &= 0x80; + + vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + if (vlan_tag < 0x1000) { + if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) + return -ENOENT; + + path->vlan_index = vidx; + path->fl = 1 << 6; + } + } else + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); return 0; } +static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { + ge->added = 1; + ge->port = qp->port; + } + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -980,7 +1049,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - context->pri_path.ackto = attr->timeout << 3; + context->pri_path.ackto |= attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } @@ -1118,8 +1187,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; - if (attr_mask & IB_QP_PORT) + if (attr_mask & IB_QP_PORT) { qp->port = attr->port_num; + update_mcg_macs(dev, qp); + } if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; @@ -1221,40 +1292,59 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; + int is_eth; + int is_vlan = 0; + int is_grh; + u16 vlan; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; - ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header); + is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_grh = mlx4_ib_ah_grh_present(ah); + if (is_eth) { + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + vlan = rdma_get_vlan_id(&sgid); + is_vlan = vlan < 0x1000; + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); + + if (!is_eth) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; + sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } - sqp->ud_header.lrh.service_level = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - sqp->ud_header.lrh.destination_lid = ah->av.dlid; - sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); - if (mlx4_ib_ah_grh_present(ah)) { + if (is_grh) { sqp->ud_header.grh.traffic_class = - (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = - ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); - sqp->ud_header.grh.hop_limit = ah->av.hop_limit; - ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, - ah->av.gid_index, &sqp->ud_header.grh.source_gid); + ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, - ah->av.dgid, 16); + ah->av.ib.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); - mlx->rlid = sqp->ud_header.lrh.destination_lid; + + if (!is_eth) { + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + } switch (wr->opcode) { case IB_WR_SEND: @@ -1270,9 +1360,29 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) - sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + if (is_eth) { + u8 *smac; + + memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); + /* FIXME: cache smac value? */ + smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; + memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) + mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + u16 pcp; + + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13; + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } + } else { + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + } sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); @@ -1429,11 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_send_wr *wr, __be16 *vlan) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1536,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; + __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1639,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, wr, &vlan); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1717,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + if (be16_to_cpu(vlan) < 0x1000) { + ctrl->ins_vlan = 1 << 6; + ctrl->vlan_tag = vlan; + } + stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -1866,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags) return ib_flags; } -static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); - ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; @@ -1929,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 3603ae89b606..f4ceecd9684b 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1817,7 +1817,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, case IB_QPT_RAW_IPV6: op_mod = 2; break; - case IB_QPT_RAW_ETY: + case IB_QPT_RAW_ETHERTYPE: op_mod = 3; break; default: diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index d2d172e6289c..a34c9d38e822 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1493,7 +1493,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int err; u16 pkey; - ib_ud_header_init(256, /* assume a MAD */ + ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, &sqp->ud_header); diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index de7b9d7166f3..0c9f0aa5d4ea 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -110,8 +110,8 @@ static unsigned int sysfs_nonidx_addr; static unsigned int sysfs_idx_addr; static struct pci_device_id nes_pci_table[] = { - {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID}, - {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR, PCI_ANY_ID, PCI_ANY_ID}, + { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), }, + { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), }, {0} }; @@ -259,13 +259,11 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r unsigned long flags; struct nes_qp *nesqp = cqp_request->cqp_callback_pointer; struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 qp_id; atomic_inc(&qps_destroyed); /* Free the control structures */ - qp_id = nesqp->hwqp.qp_id; if (nesqp->pbl_vbase) { pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); @@ -441,7 +439,6 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i struct net_device *netdev = NULL; struct nes_device *nesdev = NULL; int ret = 0; - struct nes_vnic *nesvnic = NULL; void __iomem *mmio_regs = NULL; u8 hw_rev; @@ -664,25 +661,21 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nes_notifiers_registered++; /* Initialize network devices */ - if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) { - goto bail7; - } - - /* Register network device */ - ret = register_netdev(netdev); - if (ret) { - printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); - nes_netdev_destroy(netdev); - goto bail7; - } + if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) + goto bail7; - nes_print_macaddr(netdev); - /* create a CM core for this netdev */ - nesvnic = netdev_priv(netdev); + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); + nes_netdev_destroy(netdev); + goto bail7; + } - nesdev->netdev_count++; - nesdev->nesadapter->netdev_count++; + nes_print_macaddr(netdev); + nesdev->netdev_count++; + nesdev->nesadapter->netdev_count++; printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n", pci_name(pcidev)); @@ -1104,7 +1097,7 @@ static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf) i++; } - return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta); + return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value); } diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index cc78fee1dd51..b3d145e82b4c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -262,6 +262,7 @@ struct nes_device { u16 base_doorbell_index; u16 currcq_count; u16 deepcq_count; + u8 iw_status; u8 msi_enabled; u8 netdev_count; u8 napi_isr_ran; @@ -527,6 +528,7 @@ void nes_cm_disconn_worker(void *); int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32); int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); struct nes_ib_device *nes_init_ofa_device(struct net_device *); +void nes_port_ibevent(struct nes_vnic *nesvnic); void nes_destroy_ofa_device(struct nes_ib_device *); int nes_register_ofa_device(struct nes_ib_device *); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 986d6f32dded..25ad0f9944c0 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -502,7 +502,9 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, static void nes_retrans_expired(struct nes_cm_node *cm_node) { struct iw_cm_id *cm_id = cm_node->cm_id; - switch (cm_node->state) { + enum nes_cm_node_state state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + switch (state) { case NES_CM_STATE_SYN_RCVD: case NES_CM_STATE_CLOSING: rem_ref_cm_node(cm_node->cm_core, cm_node); @@ -511,7 +513,6 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node) case NES_CM_STATE_FIN_WAIT1: if (cm_node->cm_id) cm_id->rem_ref(cm_id); - cm_node->state = NES_CM_STATE_CLOSED; send_reset(cm_node, NULL); break; default: @@ -1146,7 +1147,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi } if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) - neigh_event_send(rt->u.dst.neighbour, NULL); + neigh_event_send(rt->dst.neighbour, NULL); ip_rt_put(rt); return rc; @@ -1423,7 +1424,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, { int reset = 0; /* whether to send reset in case of err.. */ - int passive_state; atomic_inc(&cm_resets_recvd); nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." " refcnt=%d\n", cm_node, cm_node->state, @@ -1438,10 +1438,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, active_open_err(cm_node, skb, reset); break; case NES_CM_STATE_MPAREQ_RCVD: - passive_state = atomic_add_return(1, &cm_node->passive_state); - if (passive_state == NES_SEND_RESET_EVENT) - create_event(cm_node, NES_CM_EVENT_RESET); - cm_node->state = NES_CM_STATE_CLOSED; + atomic_inc(&cm_node->passive_state); dev_kfree_skb_any(skb); break; case NES_CM_STATE_ESTABLISHED: @@ -1456,6 +1453,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_CLOSED: drop_packet(skb); break; + case NES_CM_STATE_FIN_WAIT2: case NES_CM_STATE_FIN_WAIT1: case NES_CM_STATE_LAST_ACK: cm_node->cm_id->rem_ref(cm_node->cm_id); @@ -1719,8 +1717,6 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, { int datasize = 0; u32 inc_sequence; - u32 rem_seq_ack; - u32 rem_seq; int ret = 0; int optionsize; optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); @@ -1730,8 +1726,6 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, skb_pull(skb, tcph->doff << 2); inc_sequence = ntohl(tcph->seq); - rem_seq = ntohl(tcph->seq); - rem_seq_ack = ntohl(tcph->ack_seq); datasize = skb->len; switch (cm_node->state) { case NES_CM_STATE_SYN_RCVD: @@ -2565,7 +2559,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; - enum iw_cm_event_type disconn_status = IW_CM_EVENT_STATUS_OK; + enum iw_cm_event_status disconn_status = IW_CM_EVENT_STATUS_OK; int issue_disconn = 0; int issue_close = 0; int issue_flush = 0; @@ -2706,7 +2700,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) nesibdev = nesvnic->nesibdev; nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); if (nesqp->active_conn) { @@ -2781,6 +2775,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; } + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return -ECONNRESET; + } + /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; cm_node->nesqp = nesqp; @@ -2790,7 +2790,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) atomic_inc(&cm_accepts); nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); /* allocate the ietf frame and space for private data */ nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, @@ -2983,9 +2983,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " "ret=%d\n", __func__, __LINE__, ret); - passive_state = atomic_add_return(1, &cm_node->passive_state); - if (passive_state == NES_SEND_RESET_EVENT) - create_event(cm_node, NES_CM_EVENT_RESET); return 0; } @@ -3128,17 +3125,15 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) struct nes_vnic *nesvnic; struct nes_cm_listener *cm_node; struct nes_cm_info cm_info; - struct nes_adapter *adapter; int err; - nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", cm_id, ntohs(cm_id->local_addr.sin_port)); nesvnic = to_nesvnic(cm_id->device); if (!nesvnic) return -EINVAL; - adapter = nesvnic->nesdev->nesadapter; + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", nesvnic, nesvnic->netdev, nesvnic->netdev->name); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 57874a165083..1980a461c499 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1970,7 +1970,7 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) dev_kfree_skb( nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]); - nesvnic->nic.sq_tail = (++nesvnic->nic.sq_tail) + nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1) & (nesvnic->nic.sq_size - 1); } @@ -2737,9 +2737,9 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) nesnic->sq_tail &= nesnic->sq_size-1; if (sq_cqes > 128) { barrier(); - /* restart the queue if it had been stopped */ - if (netif_queue_stopped(nesvnic->netdev)) - netif_wake_queue(nesvnic->netdev); + /* restart the queue if it had been stopped */ + if (netif_queue_stopped(nesvnic->netdev)) + netif_wake_queue(nesvnic->netdev); sq_cqes = 0; } } else { @@ -2999,11 +2999,8 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) static u8 *locate_mpa(u8 *pkt, u32 aeq_info) { - u16 pkt_len; - if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) { /* skip over ethernet header */ - pkt_len = be16_to_cpu(*(u16 *)(pkt + ETH_HLEN - 2)); pkt += ETH_HLEN; /* Skip over IP and TCP headers */ @@ -3283,9 +3280,15 @@ static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *n else mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG; - nes_terminate_start_timer(nesqp); - nesqp->term_flags |= NES_TERM_SENT; - nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0); + if (!nesdev->iw_status) { + nesqp->term_flags = NES_TERM_DONE; + nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0); + nes_cm_disconn(nesqp); + } else { + nes_terminate_start_timer(nesqp); + nesqp->term_flags |= NES_TERM_SENT; + nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0); + } } static void nes_terminate_send_fin(struct nes_device *nesdev, @@ -3465,6 +3468,19 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, return; /* Ignore it, wait for close complete */ if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && + (nesqp->ibqp_state == IB_QPS_RTS) && + ((nesadapter->eeprom_version >> 16) != NES_A0)) { + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + } nesqp->cm_id->add_ref(nesqp->cm_id); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); @@ -3474,7 +3490,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), async_event_id, nesqp->last_aeq, tcp_state); } - break; case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: if (nesqp->term_flags) { diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index bbbfe9fc5a5a..1204c3432b63 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -45,6 +45,7 @@ #define NES_PHY_TYPE_KR 9 #define NES_MULTICAST_PF_MAX 8 +#define NES_A0 3 enum pci_regs { NES_INT_STAT = 0x0000, @@ -1100,11 +1101,12 @@ struct nes_adapter { u32 wqm_wat; u32 core_clock; u32 firmware_version; + u32 eeprom_version; u32 nic_rx_eth_route_err; u32 et_rx_coalesce_usecs; - u32 et_rx_max_coalesced_frames; + u32 et_rx_max_coalesced_frames; u32 et_rx_coalesce_usecs_irq; u32 et_rx_max_coalesced_frames_irq; u32 et_pkt_rate_low; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 5cc0a9ae5bb1..3892e2c0e95a 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -232,6 +232,13 @@ static int nes_netdev_open(struct net_device *netdev) NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); first_nesvnic = nesvnic; } + + if (nesvnic->of_device_registered) { + nesdev->iw_status = 1; + nesdev->nesadapter->send_term_ok = 1; + nes_port_ibevent(nesvnic); + } + if (first_nesvnic->linkup) { /* Enable network packets */ nesvnic->linkup = 1; @@ -264,6 +271,7 @@ static int nes_netdev_stop(struct net_device *netdev) if (netif_msg_ifdown(nesvnic)) printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); + netif_carrier_off(netdev); /* Disable network packets */ napi_disable(&nesvnic->napi); @@ -309,9 +317,9 @@ static int nes_netdev_stop(struct net_device *netdev) if (nesvnic->of_device_registered) { - nes_destroy_ofa_device(nesvnic->nesibdev); - nesvnic->nesibdev = NULL; - nesvnic->of_device_registered = 0; + nesdev->nesadapter->send_term_ok = 0; + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); } nes_destroy_nic_qp(nesvnic); @@ -463,7 +471,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u16 nhoffset; u16 wqes_needed; u16 wqes_available; - u32 old_head; u32 wqe_misc; /* @@ -503,7 +510,6 @@ sq_no_longer_full: if (skb_is_gso(skb)) { nesvnic->segmented_tso_requests++; nesvnic->tso_requests++; - old_head = nesnic->sq_head; /* Basically 4 fragments available per WQE with extended fragments */ wqes_needed = nr_frags >> 2; wqes_needed += (nr_frags&3)?1:0; @@ -1441,14 +1447,14 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 0; } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 1; } if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { @@ -1567,6 +1573,12 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd } +static int nes_netdev_set_flags(struct net_device *netdev, u32 flags) +{ + return ethtool_op_set_flags(netdev, flags, ETH_FLAG_LRO); +} + + static const struct ethtool_ops nes_ethtool_ops = { .get_link = ethtool_op_get_link, .get_settings = nes_netdev_get_settings, @@ -1588,7 +1600,7 @@ static const struct ethtool_ops nes_ethtool_ops = { .get_tso = ethtool_op_get_tso, .set_tso = ethtool_op_set_tso, .get_flags = ethtool_op_get_flags, - .set_flags = ethtool_op_set_flags, + .set_flags = nes_netdev_set_flags, }; diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index a9f5dd272f1a..f9c417c6b3b3 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -190,6 +190,11 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + (u32)((u8)eeprom_data); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10); + printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); + nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) + + (u32)((u8)eeprom_data); + no_fw_rev: /* eeprom is valid */ eeprom_offset = nesadapter->software_eeprom_offset; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 9bc2d744b2ea..99933e4e48ff 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -476,9 +476,9 @@ static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( } nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " - "pbl.paddr= %p\n", pnesfrpl, &pnesfrpl->ibfrpl, + "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, - (void *)pnesfrpl->nes_wqe_pbl.paddr); + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); return pifrpl; } @@ -518,7 +518,7 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop memset(props, 0, sizeof(*props)); memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); - props->fw_ver = nesdev->nesadapter->fw_ver; + props->fw_ver = nesdev->nesadapter->firmware_version; props->device_cap_flags = nesdev->nesadapter->device_cap_flags; props->vendor_id = nesdev->nesadapter->vendor_id; props->vendor_part_id = nesdev->nesadapter->vendor_part_id; @@ -584,7 +584,9 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr props->lmc = 0; props->sm_lid = 0; props->sm_sl = 0; - if (nesvnic->linkup) + if (netif_queue_stopped(netdev)) + props->state = IB_PORT_DOWN; + else if (nesvnic->linkup) props->state = IB_PORT_ACTIVE; else props->state = IB_PORT_DOWN; @@ -785,7 +787,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, nesadapter->max_pd, &pd_num, &nesadapter->next_pd); @@ -1416,7 +1418,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, /* update the QP table */ nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; nes_debug(NES_DBG_QP, "netdev refcnt=%u\n", - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); return &nesqp->ibqp; } @@ -1941,7 +1943,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, u8 use_256_pbls = 0; u8 use_4k_pbls = 0; u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0; - struct nes_root_vpbl new_root = {0, 0, 0}; + struct nes_root_vpbl new_root = { 0, NULL, NULL }; u32 opcode = 0; u16 major_code; @@ -2112,13 +2114,12 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u32 driver_key = 0; u32 root_pbl_index = 0; u32 cur_pbl_index = 0; - int err = 0, pbl_depth = 0; + int err = 0; int ret = 0; u16 pbl_count = 0; u8 single_page = 1; u8 stag_key = 0; - pbl_depth = 0; region_length = 0; vpbl.pbl_vbase = NULL; root_vpbl.pbl_vbase = NULL; @@ -2931,7 +2932,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ret; u16 original_last_aeq; u8 issue_modify_qp = 0; - u8 issue_disconnect = 0; u8 dont_wait = 0; nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u," @@ -3058,6 +3058,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hte_added = 0; } if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && + (nesdev->iw_status) && (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { next_iwarp_state |= NES_CQP_QP_RESET; } else { @@ -3082,7 +3083,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", nesqp->iwarp_state); - issue_disconnect = 1; } else { nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", @@ -3485,13 +3485,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) dst_page_list[i] = cpu_to_le64(src_page_list[i]); - nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %p, " - "length: %d, rkey: %0x, pgl_paddr: %p, " + nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " + "length: %d, rkey: %0x, pgl_paddr: %llx, " "page_list_len: %u, wqe_misc: %x\n", - (void *)ib_wr->wr.fast_reg.iova_start, + (unsigned long long) ib_wr->wr.fast_reg.iova_start, ib_wr->wr.fast_reg.length, ib_wr->wr.fast_reg.rkey, - (void *)pnesfrpl->nes_wqe_pbl.paddr, + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, ib_wr->wr.fast_reg.page_list_len, wqe_misc); break; @@ -3936,6 +3936,17 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) return nesibdev; } +void nes_port_ibevent(struct nes_vnic *nesvnic) +{ + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + struct nes_device *nesdev = nesvnic->nesdev; + struct ib_event event; + event.device = &nesibdev->ibdev; + event.element.port_num = nesvnic->logical_port + 1; + event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + ib_dispatch_event(&event); +} + /** * nes_destroy_ofa_device diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 3593983df7ba..64c9e7d02d4a 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -45,6 +45,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/scatterlist.h> +#include <linux/slab.h> #include <linux/io.h> #include <linux/fs.h> #include <linux/completion.h> @@ -326,6 +327,9 @@ struct qib_verbs_txreq { #define QIB_DEFAULT_MTU 4096 +/* max number of IB ports supported per HCA */ +#define QIB_MAX_IB_PORTS 2 + /* * Possible IB config parameters for f_get/set_ib_table() */ @@ -1402,7 +1406,7 @@ extern struct mutex qib_mutex; */ #define qib_early_err(dev, fmt, ...) \ do { \ - dev_info(dev, KERN_ERR QIB_DRV_NAME ": " fmt, ##__VA_ARGS__); \ + dev_err(dev, fmt, ##__VA_ARGS__); \ } while (0) #define qib_dev_err(dd, fmt, ...) \ diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index b3955ed8f794..145da4040883 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,7 +279,7 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 10 +#define QIB_USER_SWMINOR 11 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) @@ -302,6 +302,18 @@ struct qib_base_info { #define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) /* + * If the unit is specified via open, HCA choice is fixed. If port is + * specified, it's also fixed. Otherwise we try to spread contexts + * across ports and HCAs, using different algorithims. WITHIN is + * the old default, prior to this mechanism. + */ +#define QIB_PORT_ALG_ACROSS 0 /* round robin contexts across HCAs, then + * ports; this is the default */ +#define QIB_PORT_ALG_WITHIN 1 /* use all contexts on an HCA (round robin + * active ports within), then next HCA */ +#define QIB_PORT_ALG_COUNT 2 /* number of algorithm choices */ + +/* * This structure is passed to qib_userinit() to tell the driver where * user code buffers are, sizes, etc. The offsets and sizes of the * fields must remain unchanged, for binary compatibility. It can @@ -319,7 +331,7 @@ struct qib_user_info { /* size of struct base_info to write to */ __u32 spu_base_info_size; - __u32 _spu_unused3; + __u32 spu_port_alg; /* which QIB_PORT_ALG_*; unused user minor < 11 */ /* * If two or more processes wish to share a context, each process diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 05dcf0d9a7d3..204c4dd9dce0 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -136,7 +136,8 @@ static const struct file_operations diag_file_ops = { .write = qib_diag_write, .read = qib_diag_read, .open = qib_diag_open, - .release = qib_diag_release + .release = qib_diag_release, + .llseek = default_llseek, }; static atomic_t diagpkt_count = ATOMIC_INIT(0); @@ -149,6 +150,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data, static const struct file_operations diagpkt_file_ops = { .owner = THIS_MODULE, .write = qib_diagpkt_write, + .llseek = noop_llseek, }; int qib_diag_add(struct qib_devdata *dd) diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index f15ce076ac49..9cd193603fb1 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -335,7 +335,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ } - for (last = 0, i = 1; !last; i += !last) { + for (last = 0, i = 1; !last && i <= 64; i += !last) { hdr = dd->f_get_msgheader(dd, rhf_addr); eflags = qib_hdrget_err_flags(rhf_addr); etype = qib_hdrget_rcv_type(rhf_addr); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index a142a9eb5226..79d9971aff1f 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -63,7 +63,8 @@ static const struct file_operations qib_file_ops = { .open = qib_open, .release = qib_close, .poll = qib_poll, - .mmap = qib_mmapf + .mmap = qib_mmapf, + .llseek = noop_llseek, }; /* @@ -1294,128 +1295,130 @@ bail: return ret; } -static inline int usable(struct qib_pportdata *ppd, int active_only) +static inline int usable(struct qib_pportdata *ppd) { struct qib_devdata *dd = ppd->dd; - u32 linkok = active_only ? QIBL_LINKACTIVE : - (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE); return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid && - (ppd->lflags & linkok); + (ppd->lflags & QIBL_LINKACTIVE); } -static int find_free_ctxt(int unit, struct file *fp, - const struct qib_user_info *uinfo) +/* + * Select a context on the given device, either using a requested port + * or the port based on the context number. + */ +static int choose_port_ctxt(struct file *fp, struct qib_devdata *dd, u32 port, + const struct qib_user_info *uinfo) { - struct qib_devdata *dd = qib_lookup(unit); struct qib_pportdata *ppd = NULL; - int ret; - u32 ctxt; + int ret, ctxt; - if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports)) { - ret = -ENODEV; - goto bail; - } - - /* - * If users requests specific port, only try that one port, else - * select "best" port below, based on context. - */ - if (uinfo->spu_port) { - ppd = dd->pport + uinfo->spu_port - 1; - if (!usable(ppd, 0)) { + if (port) { + if (!usable(dd->pport + port - 1)) { ret = -ENETDOWN; - goto bail; - } + goto done; + } else + ppd = dd->pport + port - 1; } - - for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { - if (dd->rcd[ctxt]) - continue; - /* - * The setting and clearing of user context rcd[x] protected - * by the qib_mutex - */ - if (!ppd) { - /* choose port based on ctxt, if up, else 1st up */ - ppd = dd->pport + (ctxt % dd->num_pports); - if (!usable(ppd, 0)) { - int i; - for (i = 0; i < dd->num_pports; i++) { - ppd = dd->pport + i; - if (usable(ppd, 0)) - break; - } - if (i == dd->num_pports) { - ret = -ENETDOWN; - goto bail; - } - } + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts && dd->rcd[ctxt]; + ctxt++) + ; + if (ctxt == dd->cfgctxts) { + ret = -EBUSY; + goto done; + } + if (!ppd) { + u32 pidx = ctxt % dd->num_pports; + if (usable(dd->pport + pidx)) + ppd = dd->pport + pidx; + else { + for (pidx = 0; pidx < dd->num_pports && !ppd; + pidx++) + if (usable(dd->pport + pidx)) + ppd = dd->pport + pidx; } - ret = setup_ctxt(ppd, ctxt, fp, uinfo); - goto bail; } - ret = -EBUSY; + ret = ppd ? setup_ctxt(ppd, ctxt, fp, uinfo) : -ENETDOWN; +done: + return ret; +} + +static int find_free_ctxt(int unit, struct file *fp, + const struct qib_user_info *uinfo) +{ + struct qib_devdata *dd = qib_lookup(unit); + int ret; + + if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports)) + ret = -ENODEV; + else + ret = choose_port_ctxt(fp, dd, uinfo->spu_port, uinfo); -bail: return ret; } -static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo) +static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, + unsigned alg) { - struct qib_pportdata *ppd; - int ret = 0, devmax; - int npresent, nup; - int ndev; + struct qib_devdata *udd = NULL; + int ret = 0, devmax, npresent, nup, ndev, dusable = 0, i; u32 port = uinfo->spu_port, ctxt; devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (nup == 0) { + ret = -ENETDOWN; + goto done; + } - for (ndev = 0; ndev < devmax; ndev++) { - struct qib_devdata *dd = qib_lookup(ndev); - - /* device portion of usable() */ - if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase)) - continue; - for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { - if (dd->rcd[ctxt]) + if (alg == QIB_PORT_ALG_ACROSS) { + unsigned inuse = ~0U; + /* find device (with ACTIVE ports) with fewest ctxts in use */ + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + unsigned cused = 0, cfree = 0; + if (!dd) continue; - if (port) { - if (port > dd->num_pports) - continue; - ppd = dd->pport + port - 1; - if (!usable(ppd, 0)) - continue; - } else { - /* - * choose port based on ctxt, if up, else - * first port that's up for multi-port HCA - */ - ppd = dd->pport + (ctxt % dd->num_pports); - if (!usable(ppd, 0)) { - int j; - - ppd = NULL; - for (j = 0; j < dd->num_pports && - !ppd; j++) - if (usable(dd->pport + j, 0)) - ppd = dd->pport + j; - if (!ppd) - continue; /* to next unit */ - } + if (port && port <= dd->num_pports && + usable(dd->pport + port - 1)) + dusable = 1; + else + for (i = 0; i < dd->num_pports; i++) + if (usable(dd->pport + i)) + dusable++; + if (!dusable) + continue; + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; + ctxt++) + if (dd->rcd[ctxt]) + cused++; + else + cfree++; + if (cfree && cused < inuse) { + udd = dd; + inuse = cused; } - ret = setup_ctxt(ppd, ctxt, fp, uinfo); + } + if (udd) { + ret = choose_port_ctxt(fp, udd, port, uinfo); goto done; } + } else { + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + ret = choose_port_ctxt(fp, dd, port, uinfo); + if (!ret) + goto done; + if (ret == -EBUSY) + dusable++; + } + } } - - if (npresent) { - if (nup == 0) - ret = -ENETDOWN; - else - ret = -EBUSY; - } else - ret = -ENXIO; + ret = dusable ? -EBUSY : -ENETDOWN; done: return ret; @@ -1481,7 +1484,7 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) { int ret; int i_minor; - unsigned swmajor, swminor; + unsigned swmajor, swminor, alg = QIB_PORT_ALG_ACROSS; /* Check to be sure we haven't already initialized this file */ if (ctxt_fp(fp)) { @@ -1498,6 +1501,9 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) swminor = uinfo->spu_userversion & 0xffff; + if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT) + alg = uinfo->spu_port_alg; + mutex_lock(&qib_mutex); if (qib_compatible_subctxts(swmajor, swminor) && @@ -1514,7 +1520,7 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); else - ret = get_a_ctxt(fp, uinfo); + ret = get_a_ctxt(fp, uinfo, alg); done_chk_sdma: if (!ret) { @@ -1717,7 +1723,7 @@ static int qib_close(struct inode *in, struct file *fp) mutex_lock(&qib_mutex); - fd = (struct qib_filedata *) fp->private_data; + fd = fp->private_data; fp->private_data = NULL; rcd = fd->rcd; if (!rcd) { @@ -1803,7 +1809,7 @@ static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo) struct qib_ctxtdata *rcd = ctxt_fp(fp); struct qib_filedata *fd; - fd = (struct qib_filedata *) fp->private_data; + fd = fp->private_data; info.num_active = qib_count_active_units(); info.unit = rcd->dd->unit; @@ -1862,7 +1868,7 @@ static int disarm_req_delay(struct qib_ctxtdata *rcd) { int ret = 0; - if (!usable(rcd->ppd, 1)) { + if (!usable(rcd->ppd)) { int i; /* * if link is down, or otherwise not usable, delay @@ -1881,7 +1887,7 @@ static int disarm_req_delay(struct qib_ctxtdata *rcd) set_bit(_QIB_EVENT_DISARM_BUFS_BIT, &rcd->user_event_mask[i]); } - for (i = 0; !usable(rcd->ppd, 1) && i < 300; i++) + for (i = 0; !usable(rcd->ppd) && i < 300; i++) msleep(100); ret = -ENETDOWN; } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 844954bf417b..f99bddc01716 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; @@ -135,8 +136,8 @@ static ssize_t driver_names_read(struct file *file, char __user *buf, } static const struct file_operations driver_ops[] = { - { .read = driver_stats_read, }, - { .read = driver_names_read, }, + { .read = driver_stats_read, .llseek = generic_file_llseek, }, + { .read = driver_names_read, .llseek = generic_file_llseek, }, }; /* read the per-device counters */ @@ -164,8 +165,8 @@ static ssize_t dev_names_read(struct file *file, char __user *buf, } static const struct file_operations cntr_ops[] = { - { .read = dev_counters_read, }, - { .read = dev_names_read, }, + { .read = dev_counters_read, .llseek = generic_file_llseek, }, + { .read = dev_names_read, .llseek = generic_file_llseek, }, }; /* @@ -210,9 +211,9 @@ static ssize_t portcntrs_2_read(struct file *file, char __user *buf, } static const struct file_operations portcntr_ops[] = { - { .read = portnames_read, }, - { .read = portcntrs_1_read, }, - { .read = portcntrs_2_read, }, + { .read = portnames_read, .llseek = generic_file_llseek, }, + { .read = portcntrs_1_read, .llseek = generic_file_llseek, }, + { .read = portcntrs_2_read, .llseek = generic_file_llseek, }, }; /* @@ -261,8 +262,8 @@ static ssize_t qsfp_2_read(struct file *file, char __user *buf, } static const struct file_operations qsfp_ops[] = { - { .read = qsfp_1_read, }, - { .read = qsfp_2_read, }, + { .read = qsfp_1_read, .llseek = generic_file_llseek, }, + { .read = qsfp_2_read, .llseek = generic_file_llseek, }, }; static ssize_t flash_read(struct file *file, char __user *buf, @@ -367,6 +368,7 @@ bail: static const struct file_operations flash_ops = { .read = flash_read, .write = flash_write, + .llseek = default_llseek, }; static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) @@ -553,13 +555,13 @@ bail: return ret; } -static int qibfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - qibfs_fill_super, mnt); - if (ret >= 0) - qib_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, qibfs_fill_super); + if (!IS_ERR(ret)) + qib_super = ret->d_sb; return ret; } @@ -601,7 +603,7 @@ int qibfs_remove(struct qib_devdata *dd) static struct file_system_type qibfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = qibfs_get_sb, + .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 5eedf83e2c3b..584d443b5335 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -5864,7 +5864,7 @@ static void write_7322_initregs(struct qib_devdata *dd) * Doesn't clear any of the error bits that might be set. */ val = TIDFLOW_ERRBITS; /* these are W1C */ - for (i = 0; i < dd->ctxtcnt; i++) { + for (i = 0; i < dd->cfgctxts; i++) { int flow; for (flow = 0; flow < NUM_TIDFLOWS_CTXT; flow++) qib_write_ureg(dd, ur_rcvflowtable+flow, val, i); @@ -7271,6 +7271,8 @@ static int serdes_7322_init(struct qib_pportdata *ppd) ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ data = qib_read_kreg_port(ppd, krp_serdesctrl); + /* Turn off IB latency mode */ + data &= ~SYM_MASK(IBSerdesCtrl_0, IB_LAT_MODE); qib_write_kreg_port(ppd, krp_serdesctrl, data | SYM_MASK(IBSerdesCtrl_0, RXLOSEN)); diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index a873dd596e81..f3b503936043 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -93,7 +93,7 @@ unsigned long *qib_cpulist; void qib_set_ctxtcnt(struct qib_devdata *dd) { if (!qib_cfgctxts) - dd->cfgctxts = dd->ctxtcnt; + dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); else if (qib_cfgctxts < dd->num_pports) dd->cfgctxts = dd->ctxtcnt; else if (qib_cfgctxts <= dd->ctxtcnt) @@ -1243,6 +1243,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev, qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " "work if CONFIG_PCI_MSI is not enabled\n", ent->device); + dd = ERR_PTR(-ENODEV); #endif break; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 7fa6e5592630..48b6674cbc49 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -103,16 +103,20 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); } else ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) + if (ret) { qib_early_err(&pdev->dev, "Unable to set DMA consistent mask: %d\n", ret); + goto bail; + } pci_set_master(pdev); ret = pci_enable_pcie_error_reporting(pdev); - if (ret) + if (ret) { qib_early_err(&pdev->dev, "Unable to enable pcie error reporting: %d\n", ret); + ret = 0; + } goto done; bail: diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index e0f65e39076b..6c39851d2ded 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -450,7 +450,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) * * Flushes both send and receive work queues. * Returns true if last WQE event should be generated. - * The QP s_lock should be held and interrupts disabled. + * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 40c0a373719c..955fb7157793 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -868,7 +868,7 @@ done: /* * Back up requester to resend the last un-ACKed request. - * The QP s_lock should be held and interrupts disabled. + * The QP r_lock and s_lock should be held and interrupts disabled. */ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait) { @@ -911,7 +911,8 @@ static void rc_timeout(unsigned long arg) struct qib_ibport *ibp; unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irqsave(&qp->r_lock, flags); + spin_lock(&qp->s_lock); if (qp->s_flags & QIB_S_TIMER) { ibp = to_iport(qp->ibqp.device, qp->port_num); ibp->n_rc_timeouts++; @@ -920,7 +921,8 @@ static void rc_timeout(unsigned long arg) qib_restart_rc(qp, qp->s_last_psn + 1, 1); qib_schedule_send(qp); } - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->r_lock, flags); } /* @@ -1414,10 +1416,6 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this now that we hold the s_lock. */ - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) - goto ack_done; - /* Ignore invalid responses. */ if (qib_cmp24(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1661,9 +1659,6 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, ibp->n_rc_dupreq++; spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this now that we hold the s_lock. */ - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) - goto unlock_done; for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) @@ -1878,9 +1873,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; - /* Prevent simultaneous processing after APM on different CPUs */ - spin_lock(&qp->r_lock); - /* * Process responses (ACKs) before anything else. Note that the * packet sequence number will be for something in the send work @@ -1891,14 +1883,14 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, opcode <= OP(ATOMIC_ACKNOWLEDGE)) { qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn, hdrsize, pmtu, rcd); - goto runlock; + return; } /* Compute 24 bits worth of difference. */ diff = qib_cmp24(psn, qp->r_psn); if (unlikely(diff)) { if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd)) - goto runlock; + return; goto send_ack; } @@ -2076,7 +2068,10 @@ send_last: goto nack_op_err; if (!ret) goto rnr_nak; - goto send_last_imm; + wc.ex.imm_data = ohdr->u.rc.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; case OP(RDMA_READ_REQUEST): { struct qib_ack_entry *e; @@ -2090,9 +2085,6 @@ send_last: if (next > QIB_MAX_RDMA_ATOMIC) next = 0; spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this while holding the s_lock. */ - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) - goto srunlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) goto nack_inv_unlck; @@ -2146,7 +2138,7 @@ send_last: qp->s_flags |= QIB_S_RESP_PENDING; qib_schedule_send(qp); - goto srunlock; + goto sunlock; } case OP(COMPARE_SWAP): @@ -2165,9 +2157,6 @@ send_last: if (next > QIB_MAX_RDMA_ATOMIC) next = 0; spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this while holding the s_lock. */ - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) - goto srunlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) goto nack_inv_unlck; @@ -2213,7 +2202,7 @@ send_last: qp->s_flags |= QIB_S_RESP_PENDING; qib_schedule_send(qp); - goto srunlock; + goto sunlock; } default: @@ -2227,7 +2216,7 @@ send_last: /* Send an ACK if requested or required. */ if (psn & (1 << 31)) goto send_ack; - goto runlock; + return; rnr_nak: qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; @@ -2238,7 +2227,7 @@ rnr_nak: atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } - goto runlock; + return; nack_op_err: qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); @@ -2250,7 +2239,7 @@ nack_op_err: atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } - goto runlock; + return; nack_inv_unlck: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -2264,7 +2253,7 @@ nack_inv: atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } - goto runlock; + return; nack_acc_unlck: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -2274,13 +2263,6 @@ nack_acc: qp->r_ack_psn = qp->r_psn; send_ack: qib_send_rc_ack(qp); -runlock: - spin_unlock(&qp->r_lock); - return; - -srunlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - spin_unlock(&qp->r_lock); return; sunlock: diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index b8456881f7f6..cad44491320b 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -656,6 +656,7 @@ unmap: } qp = tx->qp; qib_put_txreq(tx); + spin_lock(&qp->r_lock); spin_lock(&qp->s_lock); if (qp->ibqp.qp_type == IB_QPT_RC) { /* XXX what about error sending RDMA read responses? */ @@ -664,6 +665,7 @@ unmap: } else if (qp->s_wqe) qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); spin_unlock(&qp->s_lock); + spin_unlock(&qp->r_lock); /* return zero to process the next send work request */ goto unlock; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index dab4d9f4a2cc..d50a33fe8bbc 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -347,7 +347,7 @@ static struct kobj_type qib_sl2vl_ktype = { #define QIB_DIAGC_ATTR(N) \ static struct qib_diagc_attr qib_diagc_attr_##N = { \ - .attr = { .name = __stringify(N), .mode = 0444 }, \ + .attr = { .name = __stringify(N), .mode = 0664 }, \ .counter = offsetof(struct qib_ibport, n_##N) \ } @@ -403,8 +403,27 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter)); } +static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + struct qib_diagc_attr *dattr = + container_of(attr, struct qib_diagc_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, diagc_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + char *endp; + long val = simple_strtol(buf, &endp, 0); + + if (val < 0 || endp == buf) + return -EINVAL; + + *(u32 *)((char *) qibp + dattr->counter) = val; + return size; +} + static const struct sysfs_ops qib_diagc_ops = { .show = diagc_attr_show, + .store = diagc_attr_store, }; static struct kobj_type qib_diagc_ktype = { diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index af30232b6831..7f36454c225e 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -170,7 +170,7 @@ static int find_ctxt(struct qib_devdata *dd, unsigned bufn) void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask, unsigned cnt) { - struct qib_pportdata *ppd, *pppd[dd->num_pports]; + struct qib_pportdata *ppd, *pppd[QIB_MAX_IB_PORTS]; unsigned i; unsigned long flags; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 6c7fe78cca64..32ccf3c824ca 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -272,9 +272,6 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, opcode >>= 24; memset(&wc, 0, sizeof wc); - /* Prevent simultaneous processing after APM on different CPUs */ - spin_lock(&qp->r_lock); - /* Compare the PSN verses the expected PSN. */ if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { /* @@ -460,8 +457,10 @@ rdma_first: } if (opcode == OP(RDMA_WRITE_ONLY)) goto rdma_last; - else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { + wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; + } /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ @@ -474,8 +473,8 @@ rdma_first: break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): -rdma_last_imm: wc.ex.imm_data = ohdr->u.imm_data; +rdma_last_imm: hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -534,7 +533,6 @@ rdma_last: } qp->r_psn++; qp->r_state = opcode; - spin_unlock(&qp->r_lock); return; rewind: @@ -542,12 +540,10 @@ rewind: qp->r_sge.num_sge = 0; drop: ibp->n_pkt_drops++; - spin_unlock(&qp->r_lock); return; op_err: qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - spin_unlock(&qp->r_lock); return; sunlock: diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index c838cda73347..e1b3da2a1f85 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -535,13 +535,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, wc.byte_len = tlen + sizeof(struct ib_grh); /* - * We need to serialize getting a receive work queue entry and - * generating a completion for it against QPs sending to this QP - * locally. - */ - spin_lock(&qp->r_lock); - - /* * Get the next work request entry to find where to put the data. */ if (qp->r_flags & QIB_R_REUSE_SGE) @@ -552,19 +545,19 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, ret = qib_get_rwqe(qp, 0); if (ret < 0) { qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - goto bail_unlock; + return; } if (!ret) { if (qp->ibqp.qp_num == 0) ibp->n_vl15_dropped++; - goto bail_unlock; + return; } } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { qp->r_flags |= QIB_R_REUSE_SGE; ibp->n_pkt_drops++; - goto bail_unlock; + return; } if (has_grh) { qib_copy_sge(&qp->r_sge, &hdr->u.l.grh, @@ -579,7 +572,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, qp->r_sge.sge = *qp->r_sge.sg_list++; } if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) - goto bail_unlock; + return; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; @@ -601,7 +594,5 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED)) != 0); -bail_unlock: - spin_unlock(&qp->r_lock); bail:; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index cda8f4173d23..9fab40488850 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -550,10 +550,12 @@ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, { struct qib_ibport *ibp = &rcd->ppd->ibport_data; + spin_lock(&qp->r_lock); + /* Check for valid receive state. */ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { ibp->n_pkt_drops++; - return; + goto unlock; } switch (qp->ibqp.qp_type) { @@ -577,6 +579,9 @@ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, default: break; } + +unlock: + spin_unlock(&qp->r_lock); } /** diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 40e858492f90..1a1657c82edd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -147,6 +147,11 @@ static void ipoib_get_ethtool_stats(struct net_device *dev, data[index++] = priv->lro.lro_mgr.stats.no_desc; } +static int ipoib_set_flags(struct net_device *dev, u32 flags) +{ + return ethtool_op_set_flags(dev, flags, ETH_FLAG_LRO); +} + static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_rx_csum = ipoib_get_rx_csum, @@ -154,7 +159,7 @@ static const struct ethtool_ops ipoib_ethtool_ops = { .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, .get_flags = ethtool_op_get_flags, - .set_flags = ethtool_op_set_flags, + .set_flags = ipoib_set_flags, .get_strings = ipoib_get_strings, .get_sset_count = ipoib_get_sset_count, .get_ethtool_stats = ipoib_get_ethtool_stats, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ec6b4fbe25e4..dfa71903d6e4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -223,6 +223,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct sk_buff *skb; u64 mapping[IPOIB_UD_RX_SG]; + union ib_gid *dgid; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -271,6 +272,16 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ipoib_ud_dma_unmap_rx(priv, mapping); ipoib_ud_skb_put_frags(priv, skb, wc->byte_len); + /* First byte of dgid signals multicast when 0xff */ + dgid = &((struct ib_grh *)skb->data)->dgid; + + if (!(wc->wc_flags & IB_WC_GRH) || dgid->raw[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, dev->broadcast + 4, sizeof(union ib_gid)) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -281,9 +292,6 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) dev->stats.rx_bytes += skb->len; skb->dev = dev; - /* XXX get correct PACKET_ type here */ - skb->pkt_type = PACKET_HOST; - if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b4b22576f12a..9ff7bc73ed95 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1240,6 +1240,7 @@ static struct net_device *ipoib_add_port(const char *format, goto alloc_mem_failed; SET_NETDEV_DEV(priv->dev, hca->dma_device); + priv->dev->dev_id = port - 1; if (!ib_query_port(hca, port, &attr)) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); @@ -1362,6 +1363,8 @@ static void ipoib_add_one(struct ib_device *device) } for (p = s; p <= e; ++p) { + if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) + continue; dev = ipoib_add_port("ib%d", device, p); if (!IS_ERR(dev)) { priv = netdev_priv(dev); @@ -1409,8 +1412,7 @@ static int __init ipoib_init_module(void) ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); - ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, - IPOIB_MIN_QUEUE_SIZE)); + ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index b411c51842da..d00af71a2cfc 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -9,4 +9,4 @@ config INFINIBAND_ISER The iSER protocol is defined by IETF. See <http://www.ietf.org/rfc/rfc5046.txt> - and <http://www.infinibandta.org/members/spec/Annex_iSER.PDF> + and <http://members.infinibandta.org/kwspub/spec/Annex_iSER.PDF> diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 0b9ef0716588..95a08a8ca8aa 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -170,7 +170,7 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) +static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) { int i, j; u64 dma_addr; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ed3f9ebae882..cfc1d65c4577 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -291,7 +291,7 @@ static void srp_free_target_ib(struct srp_target_port *target) for (i = 0; i < SRP_RQ_SIZE; ++i) srp_free_iu(target->srp_host, target->rx_ring[i]); - for (i = 0; i < SRP_SQ_SIZE + 1; ++i) + for (i = 0; i < SRP_SQ_SIZE; ++i) srp_free_iu(target->srp_host, target->tx_ring[i]); } @@ -811,6 +811,107 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, return len; } +/* + * Must be called with target->scsi_host->host_lock held to protect + * req_lim and tx_head. Lock cannot be dropped between call here and + * call to __srp_post_send(). + * + * Note: + * An upper limit for the number of allocated information units for each + * request type is: + * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues + * more than Scsi_Host.can_queue requests. + * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. + * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than + * one unanswered SRP request to an initiator. + */ +static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target, + enum srp_iu_type iu_type) +{ + s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; + struct srp_iu *iu; + + srp_send_completion(target->send_cq, target); + + if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) + return NULL; + + /* Initiator responses to target requests do not consume credits */ + if (target->req_lim <= rsv && iu_type != SRP_IU_RSP) { + ++target->zero_req_lim; + return NULL; + } + + iu = target->tx_ring[target->tx_head & SRP_SQ_MASK]; + iu->type = iu_type; + return iu; +} + +/* + * Must be called with target->scsi_host->host_lock held to protect + * req_lim and tx_head. + */ +static int __srp_post_send(struct srp_target_port *target, + struct srp_iu *iu, int len) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + int ret = 0; + + list.addr = iu->dma; + list.length = len; + list.lkey = target->srp_host->srp_dev->mr->lkey; + + wr.next = NULL; + wr.wr_id = target->tx_head & SRP_SQ_MASK; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(target->qp, &wr, &bad_wr); + + if (!ret) { + ++target->tx_head; + if (iu->type != SRP_IU_RSP) + --target->req_lim; + } + + return ret; +} + +static int srp_post_recv(struct srp_target_port *target) +{ + unsigned long flags; + struct srp_iu *iu; + struct ib_sge list; + struct ib_recv_wr wr, *bad_wr; + unsigned int next; + int ret; + + spin_lock_irqsave(target->scsi_host->host_lock, flags); + + next = target->rx_head & SRP_RQ_MASK; + wr.wr_id = next; + iu = target->rx_ring[next]; + + list.addr = iu->dma; + list.length = iu->size; + list.lkey = target->srp_host->srp_dev->mr->lkey; + + wr.next = NULL; + wr.sg_list = &list; + wr.num_sge = 1; + + ret = ib_post_recv(target->qp, &wr, &bad_wr); + if (!ret) + ++target->rx_head; + + spin_unlock_irqrestore(target->scsi_host->host_lock, flags); + + return ret; +} + static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) { struct srp_request *req; @@ -864,10 +965,76 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) spin_unlock_irqrestore(target->scsi_host->host_lock, flags); } +static int srp_response_common(struct srp_target_port *target, s32 req_delta, + void *rsp, int len) +{ + struct ib_device *dev; + unsigned long flags; + struct srp_iu *iu; + int err = 1; + + dev = target->srp_host->srp_dev->dev; + + spin_lock_irqsave(target->scsi_host->host_lock, flags); + target->req_lim += req_delta; + + iu = __srp_get_tx_iu(target, SRP_IU_RSP); + if (!iu) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "no IU available to send response\n"); + goto out; + } + + ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); + memcpy(iu->buf, rsp, len); + ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); + + err = __srp_post_send(target, iu, len); + if (err) + shost_printk(KERN_ERR, target->scsi_host, PFX + "unable to post response: %d\n", err); + +out: + spin_unlock_irqrestore(target->scsi_host->host_lock, flags); + return err; +} + +static void srp_process_cred_req(struct srp_target_port *target, + struct srp_cred_req *req) +{ + struct srp_cred_rsp rsp = { + .opcode = SRP_CRED_RSP, + .tag = req->tag, + }; + s32 delta = be32_to_cpu(req->req_lim_delta); + + if (srp_response_common(target, delta, &rsp, sizeof rsp)) + shost_printk(KERN_ERR, target->scsi_host, PFX + "problems processing SRP_CRED_REQ\n"); +} + +static void srp_process_aer_req(struct srp_target_port *target, + struct srp_aer_req *req) +{ + struct srp_aer_rsp rsp = { + .opcode = SRP_AER_RSP, + .tag = req->tag, + }; + s32 delta = be32_to_cpu(req->req_lim_delta); + + shost_printk(KERN_ERR, target->scsi_host, PFX + "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun)); + + if (srp_response_common(target, delta, &rsp, sizeof rsp)) + shost_printk(KERN_ERR, target->scsi_host, PFX + "problems processing SRP_AER_REQ\n"); +} + static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) { struct ib_device *dev; struct srp_iu *iu; + int res; u8 opcode; iu = target->rx_ring[wc->wr_id]; @@ -879,21 +1046,10 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) opcode = *(u8 *) iu->buf; if (0) { - int i; - shost_printk(KERN_ERR, target->scsi_host, PFX "recv completion, opcode 0x%02x\n", opcode); - - for (i = 0; i < wc->byte_len; ++i) { - if (i % 8 == 0) - printk(KERN_ERR " [%02x] ", i); - printk(" %02x", ((u8 *) iu->buf)[i]); - if ((i + 1) % 8 == 0) - printk("\n"); - } - - if (wc->byte_len % 8) - printk("\n"); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, + iu->buf, wc->byte_len, true); } switch (opcode) { @@ -901,6 +1057,14 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) srp_process_rsp(target, iu->buf); break; + case SRP_CRED_REQ: + srp_process_cred_req(target, iu->buf); + break; + + case SRP_AER_REQ: + srp_process_aer_req(target, iu->buf); + break; + case SRP_T_LOGOUT: /* XXX Handle target logout */ shost_printk(KERN_WARNING, target->scsi_host, @@ -915,6 +1079,11 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len, DMA_FROM_DEVICE); + + res = srp_post_recv(target); + if (res != 0) + shost_printk(KERN_ERR, target->scsi_host, + PFX "Recv failed with error code %d\n", res); } static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) @@ -954,100 +1123,6 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) } } -static int __srp_post_recv(struct srp_target_port *target) -{ - struct srp_iu *iu; - struct ib_sge list; - struct ib_recv_wr wr, *bad_wr; - unsigned int next; - int ret; - - next = target->rx_head & (SRP_RQ_SIZE - 1); - wr.wr_id = next; - iu = target->rx_ring[next]; - - list.addr = iu->dma; - list.length = iu->size; - list.lkey = target->srp_host->srp_dev->mr->lkey; - - wr.next = NULL; - wr.sg_list = &list; - wr.num_sge = 1; - - ret = ib_post_recv(target->qp, &wr, &bad_wr); - if (!ret) - ++target->rx_head; - - return ret; -} - -static int srp_post_recv(struct srp_target_port *target) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(target->scsi_host->host_lock, flags); - ret = __srp_post_recv(target); - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); - - return ret; -} - -/* - * Must be called with target->scsi_host->host_lock held to protect - * req_lim and tx_head. Lock cannot be dropped between call here and - * call to __srp_post_send(). - */ -static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target, - enum srp_request_type req_type) -{ - s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2; - - srp_send_completion(target->send_cq, target); - - if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) - return NULL; - - if (target->req_lim < min) { - ++target->zero_req_lim; - return NULL; - } - - return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; -} - -/* - * Must be called with target->scsi_host->host_lock held to protect - * req_lim and tx_head. - */ -static int __srp_post_send(struct srp_target_port *target, - struct srp_iu *iu, int len) -{ - struct ib_sge list; - struct ib_send_wr wr, *bad_wr; - int ret = 0; - - list.addr = iu->dma; - list.length = len; - list.lkey = target->srp_host->srp_dev->mr->lkey; - - wr.next = NULL; - wr.wr_id = target->tx_head & SRP_SQ_SIZE; - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(target->qp, &wr, &bad_wr); - - if (!ret) { - ++target->tx_head; - --target->req_lim; - } - - return ret; -} - static int srp_queuecommand(struct scsi_cmnd *scmnd, void (*done)(struct scsi_cmnd *)) { @@ -1068,7 +1143,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, return 0; } - iu = __srp_get_tx_iu(target, SRP_REQ_NORMAL); + iu = __srp_get_tx_iu(target, SRP_IU_CMD); if (!iu) goto err; @@ -1076,7 +1151,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len, DMA_TO_DEVICE); - req = list_entry(target->free_reqs.next, struct srp_request, list); + req = list_first_entry(&target->free_reqs, struct srp_request, list); scmnd->scsi_done = done; scmnd->result = 0; @@ -1102,11 +1177,6 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, goto err; } - if (__srp_post_recv(target)) { - shost_printk(KERN_ERR, target->scsi_host, PFX "Recv failed\n"); - goto err_unmap; - } - ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len, DMA_TO_DEVICE); @@ -1138,7 +1208,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target) goto err; } - for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { + for (i = 0; i < SRP_SQ_SIZE; ++i) { target->tx_ring[i] = srp_alloc_iu(target->srp_host, srp_max_iu_len, GFP_KERNEL, DMA_TO_DEVICE); @@ -1154,7 +1224,7 @@ err: target->rx_ring[i] = NULL; } - for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { + for (i = 0; i < SRP_SQ_SIZE; ++i) { srp_free_iu(target->srp_host, target->tx_ring[i]); target->tx_ring[i] = NULL; } @@ -1249,6 +1319,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) int attr_mask = 0; int comp = 0; int opcode = 0; + int i; switch (event->event) { case IB_CM_REQ_ERROR: @@ -1268,8 +1339,13 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); target->req_lim = be32_to_cpu(rsp->req_lim_delta); - target->scsi_host->can_queue = min(target->req_lim, - target->scsi_host->can_queue); + /* + * Reserve credits for task management so we don't + * bounce requests back to the SCSI mid-layer. + */ + target->scsi_host->can_queue + = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, + target->scsi_host->can_queue); } else { shost_printk(KERN_WARNING, target->scsi_host, PFX "Unhandled RSP opcode %#x\n", opcode); @@ -1298,7 +1374,11 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) if (target->status) break; - target->status = srp_post_recv(target); + for (i = 0; i < SRP_RQ_SIZE; i++) { + target->status = srp_post_recv(target); + if (target->status) + break; + } if (target->status) break; @@ -1362,6 +1442,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) static int srp_send_tsk_mgmt(struct srp_target_port *target, struct srp_request *req, u8 func) { + struct ib_device *dev = target->srp_host->srp_dev->dev; struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; @@ -1375,10 +1456,12 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, init_completion(&req->done); - iu = __srp_get_tx_iu(target, SRP_REQ_TASK_MGMT); + iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); if (!iu) goto out; + ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, + DMA_TO_DEVICE); tsk_mgmt = iu->buf; memset(tsk_mgmt, 0, sizeof *tsk_mgmt); @@ -1388,6 +1471,8 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, tsk_mgmt->tsk_mgmt_func = func; tsk_mgmt->task_tag = req->index; + ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, + DMA_TO_DEVICE); if (__srp_post_send(target, iu, sizeof *tsk_mgmt)) goto out; @@ -1564,6 +1649,18 @@ static ssize_t show_orig_dgid(struct device *dev, return sprintf(buf, "%pI6\n", target->orig_dgid); } +static ssize_t show_req_lim(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) + return -ENODEV; + + return sprintf(buf, "%d\n", target->req_lim); +} + static ssize_t show_zero_req_lim(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1598,6 +1695,7 @@ static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); +static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); @@ -1609,6 +1707,7 @@ static struct device_attribute *srp_host_attrs[] = { &dev_attr_pkey, &dev_attr_dgid, &dev_attr_orig_dgid, + &dev_attr_req_lim, &dev_attr_zero_req_lim, &dev_attr_local_ib_port, &dev_attr_local_ib_device, @@ -1624,9 +1723,9 @@ static struct scsi_host_template srp_template = { .eh_abort_handler = srp_abort, .eh_device_reset_handler = srp_reset_device, .eh_host_reset_handler = srp_reset_host, - .can_queue = SRP_SQ_SIZE, + .can_queue = SRP_CMD_SQ_SIZE, .this_id = -1, - .cmd_per_lun = SRP_SQ_SIZE, + .cmd_per_lun = SRP_CMD_SQ_SIZE, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = srp_host_attrs }; @@ -1811,7 +1910,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p); goto out; } - target->scsi_host->cmd_per_lun = min(token, SRP_SQ_SIZE); + target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); break; case SRP_OPT_IO_CLASS: @@ -1889,7 +1988,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_LIST_HEAD(&target->free_reqs); INIT_LIST_HEAD(&target->req_queue); - for (i = 0; i < SRP_SQ_SIZE; ++i) { + for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { target->req_ring[i].index = i; list_add_tail(&target->req_ring[i].list, &target->free_reqs); } @@ -2157,6 +2256,9 @@ static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON_NOT_POWER_OF_2(SRP_SQ_SIZE); + BUILD_BUG_ON_NOT_POWER_OF_2(SRP_RQ_SIZE); + if (srp_sg_tablesize > 255) { printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n"); srp_sg_tablesize = 255; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 5a80eac6fdaa..ed0dce9e479f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -59,7 +59,14 @@ enum { SRP_RQ_SHIFT = 6, SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT, - SRP_SQ_SIZE = SRP_RQ_SIZE - 1, + SRP_RQ_MASK = SRP_RQ_SIZE - 1, + + SRP_SQ_SIZE = SRP_RQ_SIZE, + SRP_SQ_MASK = SRP_SQ_SIZE - 1, + SRP_RSP_SQ_SIZE = 1, + SRP_REQ_SQ_SIZE = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE, + SRP_TSK_MGMT_SQ_SIZE = 1, + SRP_CMD_SQ_SIZE = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1), @@ -75,9 +82,10 @@ enum srp_target_state { SRP_TARGET_REMOVED }; -enum srp_request_type { - SRP_REQ_NORMAL, - SRP_REQ_TASK_MGMT, +enum srp_iu_type { + SRP_IU_CMD, + SRP_IU_TSK_MGMT, + SRP_IU_RSP, }; struct srp_device { @@ -144,11 +152,11 @@ struct srp_target_port { unsigned tx_head; unsigned tx_tail; - struct srp_iu *tx_ring[SRP_SQ_SIZE + 1]; + struct srp_iu *tx_ring[SRP_SQ_SIZE]; struct list_head free_reqs; struct list_head req_queue; - struct srp_request req_ring[SRP_SQ_SIZE]; + struct srp_request req_ring[SRP_CMD_SQ_SIZE]; struct work_struct work; @@ -164,6 +172,7 @@ struct srp_iu { void *buf; size_t size; enum dma_data_direction direction; + enum srp_iu_type type; }; #endif /* IB_SRP_H */ |