diff options
Diffstat (limited to 'drivers/infiniband')
95 files changed, 20092 insertions, 1447 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index a193dfbf99d2..a5dc78ae62d4 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -44,8 +44,8 @@ source "drivers/infiniband/hw/ipath/Kconfig" source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" - source "drivers/infiniband/hw/mlx4/Kconfig" +source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index 75f325e40b54..ed35e4496241 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ +obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 5381c80de10a..a58ad8a470f9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -110,7 +110,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; int ret; - dev = ip_dev_find(ip); + dev = ip_dev_find(&init_net, ip); if (!dev) return -EADDRNOTAVAIL; @@ -158,7 +158,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; - if (ip_route_output_key(&rt, &fl)) + if (ip_route_output_key(&init_net, &rt, &fl)) return; neigh_event_send(rt->u.dst.neighbour, NULL); @@ -179,7 +179,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; - ret = ip_route_output_key(&rt, &fl); + ret = ip_route_output_key(&init_net, &rt, &fl); if (ret) goto out; @@ -261,15 +261,15 @@ static int addr_resolve_local(struct sockaddr_in *src_in, __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; - dev = ip_dev_find(dst_ip); + dev = ip_dev_find(&init_net, dst_ip); if (!dev) return -EADDRNOTAVAIL; - if (ZERONET(src_ip)) { + if (ipv4_is_zeronet(src_ip)) { src_in->sin_family = dst_in->sin_family; src_in->sin_addr.s_addr = dst_ip; ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (LOOPBACK(src_ip)) { + } else if (ipv4_is_loopback(src_ip)) { ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); if (!ret) memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2e39236d189f..638b727d42e0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -37,12 +37,14 @@ #include <linux/completion.h> #include <linux/dma-mapping.h> +#include <linux/device.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/random.h> #include <linux/rbtree.h> #include <linux/spinlock.h> +#include <linux/sysfs.h> #include <linux/workqueue.h> #include <rdma/ib_cache.h> @@ -78,17 +80,94 @@ static struct ib_cm { struct workqueue_struct *wq; } cm; +/* Counter indexes ordered by attribute ID */ +enum { + CM_REQ_COUNTER, + CM_MRA_COUNTER, + CM_REJ_COUNTER, + CM_REP_COUNTER, + CM_RTU_COUNTER, + CM_DREQ_COUNTER, + CM_DREP_COUNTER, + CM_SIDR_REQ_COUNTER, + CM_SIDR_REP_COUNTER, + CM_LAP_COUNTER, + CM_APR_COUNTER, + CM_ATTR_COUNT, + CM_ATTR_ID_OFFSET = 0x0010, +}; + +enum { + CM_XMIT, + CM_XMIT_RETRIES, + CM_RECV, + CM_RECV_DUPLICATES, + CM_COUNTER_GROUPS +}; + +static char const counter_group_names[CM_COUNTER_GROUPS] + [sizeof("cm_rx_duplicates")] = { + "cm_tx_msgs", "cm_tx_retries", + "cm_rx_msgs", "cm_rx_duplicates" +}; + +struct cm_counter_group { + struct kobject obj; + atomic_long_t counter[CM_ATTR_COUNT]; +}; + +struct cm_counter_attribute { + struct attribute attr; + int index; +}; + +#define CM_COUNTER_ATTR(_name, _index) \ +struct cm_counter_attribute cm_##_name##_counter_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ + .index = _index \ +} + +static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); +static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); +static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); +static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); +static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); +static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); +static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); +static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); +static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); +static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); +static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); + +static struct attribute *cm_counter_default_attrs[] = { + &cm_req_counter_attr.attr, + &cm_mra_counter_attr.attr, + &cm_rej_counter_attr.attr, + &cm_rep_counter_attr.attr, + &cm_rtu_counter_attr.attr, + &cm_dreq_counter_attr.attr, + &cm_drep_counter_attr.attr, + &cm_sidr_req_counter_attr.attr, + &cm_sidr_rep_counter_attr.attr, + &cm_lap_counter_attr.attr, + &cm_apr_counter_attr.attr, + NULL +}; + struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; + struct kobject port_obj; u8 port_num; + struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { struct list_head list; struct ib_device *device; + struct kobject dev_obj; u8 ack_delay; - struct cm_port port[0]; + struct cm_port *port[0]; }; struct cm_av { @@ -278,7 +357,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->device, &path->sgid, &p, NULL)) { - port = &cm_dev->port[p-1]; + port = cm_dev->port[p-1]; break; } } @@ -895,6 +974,9 @@ static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) { + struct ib_sa_path_rec *pri_path = param->primary_path; + struct ib_sa_path_rec *alt_path = param->alternate_path; + cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); @@ -918,35 +1000,46 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); cm_req_set_srq(req_msg, param->srq); - req_msg->primary_local_lid = param->primary_path->slid; - req_msg->primary_remote_lid = param->primary_path->dlid; - req_msg->primary_local_gid = param->primary_path->sgid; - req_msg->primary_remote_gid = param->primary_path->dgid; - cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label); - cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate); - req_msg->primary_traffic_class = param->primary_path->traffic_class; - req_msg->primary_hop_limit = param->primary_path->hop_limit; - cm_req_set_primary_sl(req_msg, param->primary_path->sl); - cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ + if (pri_path->hop_limit <= 1) { + req_msg->primary_local_lid = pri_path->slid; + req_msg->primary_remote_lid = pri_path->dlid; + } else { + /* Work-around until there's a way to obtain remote LID info */ + req_msg->primary_local_lid = IB_LID_PERMISSIVE; + req_msg->primary_remote_lid = IB_LID_PERMISSIVE; + } + req_msg->primary_local_gid = pri_path->sgid; + req_msg->primary_remote_gid = pri_path->dgid; + cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); + cm_req_set_primary_packet_rate(req_msg, pri_path->rate); + req_msg->primary_traffic_class = pri_path->traffic_class; + req_msg->primary_hop_limit = pri_path->hop_limit; + cm_req_set_primary_sl(req_msg, pri_path->sl); + cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); cm_req_set_primary_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - param->primary_path->packet_life_time)); + pri_path->packet_life_time)); - if (param->alternate_path) { - req_msg->alt_local_lid = param->alternate_path->slid; - req_msg->alt_remote_lid = param->alternate_path->dlid; - req_msg->alt_local_gid = param->alternate_path->sgid; - req_msg->alt_remote_gid = param->alternate_path->dgid; + if (alt_path) { + if (alt_path->hop_limit <= 1) { + req_msg->alt_local_lid = alt_path->slid; + req_msg->alt_remote_lid = alt_path->dlid; + } else { + req_msg->alt_local_lid = IB_LID_PERMISSIVE; + req_msg->alt_remote_lid = IB_LID_PERMISSIVE; + } + req_msg->alt_local_gid = alt_path->sgid; + req_msg->alt_remote_gid = alt_path->dgid; cm_req_set_alt_flow_label(req_msg, - param->alternate_path->flow_label); - cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate); - req_msg->alt_traffic_class = param->alternate_path->traffic_class; - req_msg->alt_hop_limit = param->alternate_path->hop_limit; - cm_req_set_alt_sl(req_msg, param->alternate_path->sl); - cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ + alt_path->flow_label); + cm_req_set_alt_packet_rate(req_msg, alt_path->rate); + req_msg->alt_traffic_class = alt_path->traffic_class; + req_msg->alt_hop_limit = alt_path->hop_limit; + cm_req_set_alt_sl(req_msg, alt_path->sl); + cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); cm_req_set_alt_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - param->alternate_path->packet_life_time)); + alt_path->packet_life_time)); } if (param->private_data && param->private_data_len) @@ -1270,6 +1363,9 @@ static void cm_dup_req_handler(struct cm_work *work, struct ib_mad_send_buf *msg = NULL; int ret; + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REQ_COUNTER]); + /* Quick state check to discard duplicate REQs. */ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) return; @@ -1359,6 +1455,34 @@ out: return listen_cm_id_priv; } +/* + * Work-around for inter-subnet connections. If the LIDs are permissive, + * we need to override the LID/SL data in the REQ with the LID information + * in the work completion. + */ +static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) +{ + if (!cm_req_get_primary_subnet_local(req_msg)) { + if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { + req_msg->primary_local_lid = cpu_to_be16(wc->slid); + cm_req_set_primary_sl(req_msg, wc->sl); + } + + if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) + req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); + } + + if (!cm_req_get_alt_subnet_local(req_msg)) { + if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { + req_msg->alt_local_lid = cpu_to_be16(wc->slid); + cm_req_set_alt_sl(req_msg, wc->sl); + } + + if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) + req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); + } +} + static int cm_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; @@ -1399,6 +1523,7 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->id.service_id = req_msg->service_id; cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); + cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { @@ -1616,6 +1741,8 @@ static void cm_dup_rep_handler(struct cm_work *work) if (!cm_id_priv) return; + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; @@ -1781,6 +1908,8 @@ static int cm_rtu_handler(struct cm_work *work) if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; @@ -1958,6 +2087,8 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; } @@ -1977,6 +2108,8 @@ static int cm_dreq_handler(struct cm_work *work) case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -1988,6 +2121,10 @@ static int cm_dreq_handler(struct cm_work *work) if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; + case IB_CM_DREQ_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); + goto unlock; default: goto unlock; } @@ -2339,10 +2476,20 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) { + if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) + atomic_long_inc(&work->port-> + counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); goto out; + } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; + case IB_CM_MRA_REQ_RCVD: + case IB_CM_MRA_REP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); + /* fall through */ default: goto out; } @@ -2502,6 +2649,8 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -2515,6 +2664,10 @@ static int cm_lap_handler(struct cm_work *work) if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; + case IB_CM_LAP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); + goto unlock; default: goto unlock; } @@ -2796,6 +2949,8 @@ static int cm_sidr_req_handler(struct cm_work *work) cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; @@ -2990,6 +3145,27 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_port *port; + u16 attr_index; + + port = mad_agent->context; + attr_index = be16_to_cpu(((struct ib_mad_hdr *) + msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; + + /* + * If the send was in response to a received message (context[0] is not + * set to a cm_id), and is not a REJ, then it is a send that was + * manually retried. + */ + if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + msg->retries = 1; + + atomic_long_add(1 + msg->retries, + &port->counter_group[CM_XMIT].counter[attr_index]); + if (msg->retries) + atomic_long_add(msg->retries, + &port->counter_group[CM_XMIT_RETRIES]. + counter[attr_index]); switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -3148,8 +3324,10 @@ EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { + struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; + u16 attr_id; int paths = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { @@ -3194,6 +3372,10 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, return; } + attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); + atomic_long_inc(&port->counter_group[CM_RECV]. + counter[attr_id - CM_ATTR_ID_OFFSET]); + work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { @@ -3204,7 +3386,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; - work->port = (struct cm_port *)mad_agent->context; + work->port = port; queue_delayed_work(cm.wq, &work->work, 0); } @@ -3379,6 +3561,108 @@ static void cm_get_ack_delay(struct cm_device *cm_dev) cm_dev->ack_delay = attr.local_ca_ack_delay; } +static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, + char *buf) +{ + struct cm_counter_group *group; + struct cm_counter_attribute *cm_attr; + + group = container_of(obj, struct cm_counter_group, obj); + cm_attr = container_of(attr, struct cm_counter_attribute, attr); + + return sprintf(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); +} + +static struct sysfs_ops cm_counter_ops = { + .show = cm_show_counter +}; + +static struct kobj_type cm_counter_obj_type = { + .sysfs_ops = &cm_counter_ops, + .default_attrs = cm_counter_default_attrs +}; + +static void cm_release_port_obj(struct kobject *obj) +{ + struct cm_port *cm_port; + + printk(KERN_ERR "free cm port\n"); + + cm_port = container_of(obj, struct cm_port, port_obj); + kfree(cm_port); +} + +static struct kobj_type cm_port_obj_type = { + .release = cm_release_port_obj +}; + +static void cm_release_dev_obj(struct kobject *obj) +{ + struct cm_device *cm_dev; + + printk(KERN_ERR "free cm dev\n"); + + cm_dev = container_of(obj, struct cm_device, dev_obj); + kfree(cm_dev); +} + +static struct kobj_type cm_dev_obj_type = { + .release = cm_release_dev_obj +}; + +struct class cm_class = { + .name = "infiniband_cm", +}; +EXPORT_SYMBOL(cm_class); + +static void cm_remove_fs_obj(struct kobject *obj) +{ + kobject_put(obj->parent); + kobject_put(obj); +} + +static int cm_create_port_fs(struct cm_port *port) +{ + int i, ret; + + ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, + kobject_get(&port->cm_dev->dev_obj), + "%d", port->port_num); + if (ret) { + kfree(port); + return ret; + } + + for (i = 0; i < CM_COUNTER_GROUPS; i++) { + ret = kobject_init_and_add(&port->counter_group[i].obj, + &cm_counter_obj_type, + kobject_get(&port->port_obj), + "%s", counter_group_names[i]); + if (ret) + goto error; + } + + return 0; + +error: + while (i--) + cm_remove_fs_obj(&port->counter_group[i].obj); + cm_remove_fs_obj(&port->port_obj); + return ret; + +} + +static void cm_remove_port_fs(struct cm_port *port) +{ + int i; + + for (i = 0; i < CM_COUNTER_GROUPS; i++) + cm_remove_fs_obj(&port->counter_group[i].obj); + + cm_remove_fs_obj(&port->port_obj); +} + static void cm_add_one(struct ib_device *device) { struct cm_device *cm_dev; @@ -3397,7 +3681,7 @@ static void cm_add_one(struct ib_device *device) if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; - cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * + cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; @@ -3405,11 +3689,27 @@ static void cm_add_one(struct ib_device *device) cm_dev->device = device; cm_get_ack_delay(cm_dev); + ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type, + &cm_class.subsys.kobj, "%s", device->name); + if (ret) { + kfree(cm_dev); + return; + } + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= device->phys_port_cnt; i++) { - port = &cm_dev->port[i-1]; + port = kzalloc(sizeof *port, GFP_KERNEL); + if (!port) + goto error1; + + cm_dev->port[i-1] = port; port->cm_dev = cm_dev; port->port_num = i; + + ret = cm_create_port_fs(port); + if (ret) + goto error1; + port->mad_agent = ib_register_mad_agent(device, i, IB_QPT_GSI, ®_req, @@ -3418,11 +3718,11 @@ static void cm_add_one(struct ib_device *device) cm_recv_handler, port); if (IS_ERR(port->mad_agent)) - goto error1; + goto error2; ret = ib_modify_port(device, i, 0, &port_modify); if (ret) - goto error2; + goto error3; } ib_set_client_data(device, &cm_client, cm_dev); @@ -3431,17 +3731,20 @@ static void cm_add_one(struct ib_device *device) write_unlock_irqrestore(&cm.device_lock, flags); return; -error2: +error3: ib_unregister_mad_agent(port->mad_agent); +error2: + cm_remove_port_fs(port); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { - port = &cm_dev->port[i-1]; + port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); } - kfree(cm_dev); + cm_remove_fs_obj(&cm_dev->dev_obj); } static void cm_remove_one(struct ib_device *device) @@ -3463,11 +3766,12 @@ static void cm_remove_one(struct ib_device *device) write_unlock_irqrestore(&cm.device_lock, flags); for (i = 1; i <= device->phys_port_cnt; i++) { - port = &cm_dev->port[i-1]; + port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); } - kfree(cm_dev); + cm_remove_fs_obj(&cm_dev->dev_obj); } static int __init ib_cm_init(void) @@ -3488,17 +3792,25 @@ static int __init ib_cm_init(void) idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); - cm.wq = create_workqueue("ib_cm"); - if (!cm.wq) + ret = class_register(&cm_class); + if (ret) return -ENOMEM; + cm.wq = create_workqueue("ib_cm"); + if (!cm.wq) { + ret = -ENOMEM; + goto error1; + } + ret = ib_register_client(&cm_client); if (ret) - goto error; + goto error2; return 0; -error: +error2: destroy_workqueue(cm.wq); +error1: + class_unregister(&cm_class); return ret; } @@ -3519,6 +3831,7 @@ static void __exit ib_cm_cleanup(void) } ib_unregister_client(&cm_client); + class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0751697ef984..1eff1b2c0e08 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -488,7 +488,8 @@ void rdma_destroy_qp(struct rdma_cm_id *id) } EXPORT_SYMBOL(rdma_destroy_qp); -static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) +static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; @@ -514,13 +515,16 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) if (ret) goto out; + if (conn_param) + qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } -static int cma_modify_qp_rts(struct rdma_id_private *id_priv) +static int cma_modify_qp_rts(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; @@ -536,6 +540,8 @@ static int cma_modify_qp_rts(struct rdma_id_private *id_priv) if (ret) goto out; + if (conn_param) + qp_attr.max_rd_atomic = conn_param->initiator_depth; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); @@ -624,7 +630,8 @@ static inline int cma_zero_addr(struct sockaddr *addr) struct in6_addr *ip6; if (addr->sa_family == AF_INET) - return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_zeronet( + ((struct sockaddr_in *)addr)->sin_addr.s_addr); else { ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | @@ -634,7 +641,7 @@ static inline int cma_zero_addr(struct sockaddr *addr) static inline int cma_loopback_addr(struct sockaddr *addr) { - return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); } static inline int cma_any_addr(struct sockaddr *addr) @@ -866,11 +873,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) { int ret; - ret = cma_modify_qp_rtr(id_priv); + ret = cma_modify_qp_rtr(id_priv, NULL); if (ret) goto reject; - ret = cma_modify_qp_rts(id_priv); + ret = cma_modify_qp_rts(id_priv, NULL); if (ret) goto reject; @@ -1122,8 +1129,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) cm_id->cm_handler = cma_ib_handler; ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) + if (!ret) { + cma_enable_remove(conn_id); goto out; + } /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; @@ -1262,6 +1271,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct net_device *dev = NULL; struct rdma_cm_event event; int ret; + struct ib_device_attr attr; listen_id = cm_id->context; if (cma_disable_remove(listen_id, CMA_LISTEN)) @@ -1279,7 +1289,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, atomic_inc(&conn_id->dev_remove); conn_id->state = CMA_CONNECT; - dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; cma_enable_remove(conn_id); @@ -1311,10 +1321,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; *sin = iw_event->remote_addr; + ret = ib_query_device(conn_id->id.device, &attr); + if (ret) { + cma_enable_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; + event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; + event.param.conn.responder_resources = attr.max_qp_rd_atom; ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ @@ -2272,7 +2291,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; cm_id->remote_addr = *sin; - ret = cma_modify_qp_rtr(id_priv); + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; @@ -2335,25 +2354,15 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_rep_param rep; - struct ib_qp_attr qp_attr; - int qp_attr_mask, ret; - - if (id_priv->id.qp) { - ret = cma_modify_qp_rtr(id_priv); - if (ret) - goto out; + int ret; - qp_attr.qp_state = IB_QPS_RTS; - ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr, - &qp_attr_mask); - if (ret) - goto out; + ret = cma_modify_qp_rtr(id_priv, conn_param); + if (ret) + goto out; - qp_attr.max_rd_atomic = conn_param->initiator_depth; - ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); - if (ret) - goto out; - } + ret = cma_modify_qp_rts(id_priv, conn_param); + if (ret) + goto out; memset(&rep, 0, sizeof rep); rep.qp_num = id_priv->qp_num; @@ -2378,7 +2387,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; - ret = cma_modify_qp_rtr(id_priv); + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; @@ -2598,11 +2607,9 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { - ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); + ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ - mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; - mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); *mgid = *(union ib_gid *) (mc_map + 4); } } diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index e8d5f6b64998..7f00347364f7 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, static void ib_fmr_batch_release(struct ib_fmr_pool *pool) { int ret; - struct ib_pool_fmr *fmr; + struct ib_pool_fmr *fmr, *next; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); @@ -158,6 +158,20 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #endif } + /* + * The free_list may hold FMRs that have been put there + * because they haven't reached the max_remap count. + * Invalidate their mapping as well. + */ + list_for_each_entry_safe(fmr, next, &pool->free_list, list) { + if (fmr->remap_count == 0) + continue; + hlist_del_init(&fmr->cache_node); + fmr->remap_count = 0; + list_add_tail(&fmr->fmr->list, &fmr_list); + list_move(&fmr->list, &unmap_list); + } + list_splice(&pool->dirty_list, &unmap_list); INIT_LIST_HEAD(&pool->dirty_list); pool->dirty_len = 0; @@ -182,8 +196,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr) struct ib_fmr_pool *pool = pool_ptr; do { - if (pool->dirty_len >= pool->dirty_watermark || - atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { ib_fmr_batch_release(pool); atomic_inc(&pool->flush_ser); @@ -194,8 +207,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr) } set_current_state(TASK_INTERRUPTIBLE); - if (pool->dirty_len < pool->dirty_watermark && - atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && !kthread_should_stop()) schedule(); __set_current_state(TASK_RUNNING); @@ -308,10 +320,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, .max_maps = pool->max_remaps, .page_shift = params->page_shift }; + int bytes_per_fmr = sizeof *fmr; + + if (pool->cache_bucket) + bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); for (i = 0; i < params->pool_size; ++i) { - fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64), - GFP_KERNEL); + fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); if (!fmr) { printk(KERN_WARNING PFX "failed to allocate fmr " "struct for FMR %d\n", i); @@ -369,11 +384,6 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { - if (fmr->remap_count) { - INIT_LIST_HEAD(&fmr_list); - list_add_tail(&fmr->fmr->list, &fmr_list); - ib_unmap_fmr(&fmr_list); - } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); @@ -511,8 +521,10 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) list_add_tail(&fmr->list, &pool->free_list); } else { list_add_tail(&fmr->list, &pool->dirty_list); - ++pool->dirty_len; - wake_up_process(pool->thread); + if (++pool->dirty_len >= pool->dirty_watermark) { + atomic_inc(&pool->req_ser); + wake_up_process(pool->thread); + } } } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 6f4287716ab1..fbe16d5250a4 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -701,7 +701,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD) + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) goto out; local = kmalloc(sizeof *local, GFP_ATOMIC); @@ -752,8 +753,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - mad_priv->mad.mad.mad_hdr.tid = - ((struct ib_mad *)smp)->mad_hdr.tid; + memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); recv_mad_agent = find_mad_agent(port_priv, &mad_priv->mad.mad); } @@ -1100,7 +1100,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); - mad_send_wr->retries = send_buf->retries; + mad_send_wr->max_retries = send_buf->retries; + mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -1931,15 +1933,6 @@ local: if (port_priv->device->process_mad) { int ret; - if (!response) { - printk(KERN_ERR PFX "No memory for response MAD\n"); - /* - * Is it better to assume that - * it wouldn't be processed ? - */ - goto out; - } - ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, @@ -2282,8 +2275,6 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - /* Empty local completion list as well */ - list_splice_init(&mad_agent_priv->local_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ @@ -2445,9 +2436,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; - if (!mad_send_wr->retries--) + if (!mad_send_wr->retries_left) return -ETIMEDOUT; + mad_send_wr->retries_left--; + mad_send_wr->send_buf.retries++; + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 9be5cc00a3a9..8b75010016ec 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -131,7 +131,8 @@ struct ib_mad_send_wr_private { struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; - int retries; + int max_retries; + int retries_left; int retry; int refcount; enum ib_wc_status status; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index d43bc62005b3..a5e2a310f312 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -684,7 +684,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { adjust_last_ack(mad_send_wr, seg_num); - mad_send_wr->retries = mad_send_wr->send_buf.retries; + mad_send_wr->retries_left = mad_send_wr->max_retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1bc1fe605282..107f170c57cd 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -73,11 +73,20 @@ struct mcast_device { }; enum mcast_state { - MCAST_IDLE, MCAST_JOINING, MCAST_MEMBER, + MCAST_ERROR, +}; + +enum mcast_group_state { + MCAST_IDLE, MCAST_BUSY, - MCAST_ERROR + MCAST_GROUP_ERROR, + MCAST_PKEY_EVENT +}; + +enum { + MCAST_INVALID_PKEY_INDEX = 0xFFFF }; struct mcast_member; @@ -93,9 +102,10 @@ struct mcast_group { struct mcast_member *last_join; int members[3]; atomic_t refcount; - enum mcast_state state; + enum mcast_group_state state; struct ib_sa_query *query; int query_id; + u16 pkey_index; }; struct mcast_member { @@ -378,9 +388,19 @@ static int fail_join(struct mcast_group *group, struct mcast_member *member, static void process_group_error(struct mcast_group *group) { struct mcast_member *member; - int ret; + int ret = 0; + u16 pkey_index; + + if (group->state == MCAST_PKEY_EVENT) + ret = ib_find_pkey(group->port->dev->device, + group->port->port_num, + be16_to_cpu(group->rec.pkey), &pkey_index); spin_lock_irq(&group->lock); + if (group->state == MCAST_PKEY_EVENT && !ret && + group->pkey_index == pkey_index) + goto out; + while (!list_empty(&group->active_list)) { member = list_entry(group->active_list.next, struct mcast_member, list); @@ -399,6 +419,7 @@ static void process_group_error(struct mcast_group *group) } group->rec.join_state = 0; +out: group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); } @@ -415,9 +436,9 @@ static void mcast_work_handler(struct work_struct *work) retest: spin_lock_irq(&group->lock); while (!list_empty(&group->pending_list) || - (group->state == MCAST_ERROR)) { + (group->state != MCAST_BUSY)) { - if (group->state == MCAST_ERROR) { + if (group->state != MCAST_BUSY) { spin_unlock_irq(&group->lock); process_group_error(group); goto retest; @@ -494,12 +515,19 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; + u16 pkey_index = MCAST_INVALID_PKEY_INDEX; if (status) process_join_error(group, status); else { + ib_find_pkey(group->port->dev->device, group->port->port_num, + be16_to_cpu(rec->pkey), &pkey_index); + spin_lock_irq(&group->port->lock); group->rec = *rec; + if (group->state == MCAST_BUSY && + group->pkey_index == MCAST_INVALID_PKEY_INDEX) + group->pkey_index = pkey_index; if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { rb_erase(&group->node, &group->port->table); mcast_insert(group->port, group, 1); @@ -539,6 +567,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port, group->port = port; group->rec.mgid = *mgid; + group->pkey_index = MCAST_INVALID_PKEY_INDEX; INIT_LIST_HEAD(&group->pending_list); INIT_LIST_HEAD(&group->active_list); INIT_WORK(&group->work, mcast_work_handler); @@ -707,7 +736,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_init_ah_from_mcmember); -static void mcast_groups_lost(struct mcast_port *port) +static void mcast_groups_event(struct mcast_port *port, + enum mcast_group_state state) { struct mcast_group *group; struct rb_node *node; @@ -721,7 +751,8 @@ static void mcast_groups_lost(struct mcast_port *port) atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } - group->state = MCAST_ERROR; + if (group->state != MCAST_GROUP_ERROR) + group->state = state; spin_unlock(&group->lock); } spin_unlock_irqrestore(&port->lock, flags); @@ -731,16 +762,20 @@ static void mcast_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct mcast_device *dev; + int index; dev = container_of(handler, struct mcast_device, event_handler); + index = event->element.port_num - dev->start_port; switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: - mcast_groups_lost(&dev->port[event->element.port_num - - dev->start_port]); + mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); + break; + case IB_EVENT_PKEY_CHANGE: + mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); break; default: break; diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 1cfc2984434f..aff96bac49b4 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -59,7 +59,8 @@ extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, u8 node_type, int port_num); /* - * Return 1 if the SMP should be handled by the local SMA/SM via process_mad + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad */ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, struct ib_device *device) @@ -71,4 +72,19 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD); } + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp, + struct ib_device *device) +{ + /* C14-13:3 -- We're at the end of the DR segment of path */ + /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ + return ((device->process_mad && + ib_get_smp_direction(smp) && + !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD); +} + #endif /* __SMI_H_ */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 3d4050681325..c864ef70fdf9 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -508,19 +508,10 @@ static int add_port(struct ib_device *device, int port_num) p->ibdev = device; p->port_num = port_num; - p->kobj.ktype = &port_type; - p->kobj.parent = kobject_get(&device->ports_parent); - if (!p->kobj.parent) { - ret = -EBUSY; - goto err; - } - - ret = kobject_set_name(&p->kobj, "%d", port_num); - if (ret) - goto err_put; - - ret = kobject_register(&p->kobj); + ret = kobject_init_and_add(&p->kobj, &port_type, + kobject_get(device->ports_parent), + "%d", port_num); if (ret) goto err_put; @@ -549,6 +540,7 @@ static int add_port(struct ib_device *device, int port_num) list_add_tail(&p->kobj.entry, &device->port_list); + kobject_uevent(&p->kobj, KOBJ_ADD); return 0; err_free_pkey: @@ -570,9 +562,7 @@ err_remove_pma: sysfs_remove_group(&p->kobj, &pma_group); err_put: - kobject_put(&device->ports_parent); - -err: + kobject_put(device->ports_parent); kfree(p); return ret; } @@ -694,16 +684,9 @@ int ib_device_register_sysfs(struct ib_device *device) goto err_unregister; } - device->ports_parent.parent = kobject_get(&class_dev->kobj); - if (!device->ports_parent.parent) { - ret = -EBUSY; - goto err_unregister; - } - ret = kobject_set_name(&device->ports_parent, "ports"); - if (ret) - goto err_put; - ret = kobject_register(&device->ports_parent); - if (ret) + device->ports_parent = kobject_create_and_add("ports", + kobject_get(&class_dev->kobj)); + if (!device->ports_parent) goto err_put; if (device->node_type == RDMA_NODE_IB_SWITCH) { @@ -731,7 +714,7 @@ err_put: sysfs_remove_group(p, &pma_group); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); - kobject_unregister(p); + kobject_put(p); } } @@ -755,10 +738,10 @@ void ib_device_unregister_sysfs(struct ib_device *device) sysfs_remove_group(p, &pma_group); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); - kobject_unregister(p); + kobject_put(p); } - kobject_unregister(&device->ports_parent); + kobject_put(device->ports_parent); class_device_unregister(&device->class_dev); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 424983f5b1ee..4291ab42a5b9 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -106,6 +106,9 @@ enum { IB_UCM_MAX_DEVICES = 32 }; +/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ +extern struct class cm_class; + #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); @@ -1199,7 +1202,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static void ib_ucm_release_class_dev(struct class_device *class_dev) +static void ucm_release_class_dev(struct class_device *class_dev) { struct ib_ucm_device *dev; @@ -1217,11 +1220,6 @@ static const struct file_operations ucm_fops = { .poll = ib_ucm_poll, }; -static struct class ucm_class = { - .name = "infiniband_cm", - .release = ib_ucm_release_class_dev -}; - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_ucm_device *dev; @@ -1257,9 +1255,10 @@ static void ib_ucm_add_one(struct ib_device *device) if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) goto err; - ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.class = &cm_class; ucm_dev->class_dev.dev = device->dma_device; ucm_dev->class_dev.devt = ucm_dev->dev.dev; + ucm_dev->class_dev.release = ucm_release_class_dev; snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", ucm_dev->devnum); if (class_device_register(&ucm_dev->class_dev)) @@ -1306,40 +1305,34 @@ static int __init ib_ucm_init(void) "infiniband_cm"); if (ret) { printk(KERN_ERR "ucm: couldn't register device number\n"); - goto err; + goto error1; } - ret = class_register(&ucm_class); - if (ret) { - printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); - goto err_chrdev; - } - - ret = class_create_file(&ucm_class, &class_attr_abi_version); + ret = class_create_file(&cm_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); - goto err_class; + goto error2; } ret = ib_register_client(&ucm_client); if (ret) { printk(KERN_ERR "ucm: couldn't register client\n"); - goto err_class; + goto error3; } return 0; -err_class: - class_unregister(&ucm_class); -err_chrdev: +error3: + class_remove_file(&cm_class, &class_attr_abi_version); +error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); -err: +error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); - class_unregister(&ucm_class); + class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 90d675ad9ec8..15937eb38aae 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -31,6 +31,7 @@ */ #include <linux/completion.h> +#include <linux/file.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/idr.h> @@ -991,6 +992,96 @@ out: return ret; } +static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + /* Acquire mutex's based on pointer comparison to prevent deadlock. */ + if (file1 < file2) { + mutex_lock(&file1->mut); + mutex_lock(&file2->mut); + } else { + mutex_lock(&file2->mut); + mutex_lock(&file1->mut); + } +} + +static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + if (file1 < file2) { + mutex_unlock(&file2->mut); + mutex_unlock(&file1->mut); + } else { + mutex_unlock(&file1->mut); + mutex_unlock(&file2->mut); + } +} + +static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &file->event_list); +} + +static ssize_t ucma_migrate_id(struct ucma_file *new_file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_migrate_id cmd; + struct rdma_ucm_migrate_resp resp; + struct ucma_context *ctx; + struct file *filp; + struct ucma_file *cur_file; + int ret = 0; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + /* Get current fd to protect against it being closed */ + filp = fget(cmd.fd); + if (!filp) + return -ENOENT; + + /* Validate current fd and prevent destruction of id. */ + ctx = ucma_get_ctx(filp->private_data, cmd.id); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto file_put; + } + + cur_file = ctx->file; + if (cur_file == new_file) { + resp.events_reported = ctx->events_reported; + goto response; + } + + /* + * Migrate events between fd's, maintaining order, and avoiding new + * events being added before existing events. + */ + ucma_lock_files(cur_file, new_file); + mutex_lock(&mut); + + list_move_tail(&ctx->list, &new_file->ctx_list); + ucma_move_events(ctx, new_file); + ctx->file = new_file; + resp.events_reported = ctx->events_reported; + + mutex_unlock(&mut); + ucma_unlock_files(cur_file, new_file); + +response: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); +file_put: + fput(filp); + return ret; +} + static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { @@ -1012,6 +1103,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b53eac4611de..4e915104ac4c 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -2,6 +2,7 @@ * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008 Cisco. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -42,7 +43,7 @@ #include <linux/cdev.h> #include <linux/dma-mapping.h> #include <linux/poll.h> -#include <linux/rwsem.h> +#include <linux/mutex.h> #include <linux/kref.h> #include <linux/compat.h> @@ -94,7 +95,7 @@ struct ib_umad_port { struct class_device *sm_class_dev; struct semaphore sm_sem; - struct rw_semaphore mutex; + struct mutex file_mutex; struct list_head file_list; struct ib_device *ib_dev; @@ -110,11 +111,11 @@ struct ib_umad_device { }; struct ib_umad_file { + struct mutex mutex; struct ib_umad_port *port; struct list_head recv_list; struct list_head send_list; struct list_head port_list; - spinlock_t recv_lock; spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; @@ -156,7 +157,7 @@ static int hdr_size(struct ib_umad_file *file) sizeof (struct ib_user_mad_hdr_old); } -/* caller must hold port->mutex at least for reading */ +/* caller must hold file->mutex */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { return file->agents_dead ? NULL : file->agent[id]; @@ -168,32 +169,30 @@ static int queue_packet(struct ib_umad_file *file, { int ret = 1; - down_read(&file->port->mutex); + mutex_lock(&file->mutex); for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { - spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); - spin_unlock_irq(&file->recv_lock); wake_up_interruptible(&file->recv_wait); ret = 0; break; } - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); return ret; } static void dequeue_send(struct ib_umad_file *file, struct ib_umad_packet *packet) - { +{ spin_lock_irq(&file->send_lock); list_del(&packet->list); spin_unlock_irq(&file->send_lock); - } +} static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) @@ -341,10 +340,10 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, if (count < hdr_size(file)) return -EINVAL; - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); while (list_empty(&file->recv_list)) { - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; @@ -353,13 +352,13 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, !list_empty(&file->recv_list))) return -ERESTARTSYS; - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); } packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); if (packet->recv_wc) ret = copy_recv_mad(file, buf, packet, count); @@ -368,9 +367,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, if (ret < 0) { /* Requeue packet */ - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); } else { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); @@ -481,7 +480,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - down_read(&file->port->mutex); + mutex_lock(&file->mutex); agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { @@ -577,7 +576,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (ret) goto err_send; - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); return count; err_send: @@ -587,7 +586,7 @@ err_msg: err_ah: ib_destroy_ah(ah); err_up: - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); err: kfree(packet); return ret; @@ -613,11 +612,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; - struct ib_mad_agent *agent; + struct ib_mad_agent *agent = NULL; int agent_id; int ret; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); if (!file->port->ib_dev) { ret = -EPIPE; @@ -666,13 +666,13 @@ found: send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); + agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; - ib_unregister_mad_agent(agent); goto out; } @@ -690,7 +690,13 @@ found: ret = 0; out: - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + return ret; } @@ -703,7 +709,8 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) if (get_user(id, arg)) return -EFAULT; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; @@ -714,11 +721,13 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) file->agent[id] = NULL; out: - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); if (agent) ib_unregister_mad_agent(agent); + mutex_unlock(&file->port->file_mutex); + return ret; } @@ -726,12 +735,12 @@ static long ib_umad_enable_pkey(struct ib_umad_file *file) { int ret = 0; - down_write(&file->port->mutex); + mutex_lock(&file->mutex); if (file->already_used) ret = -EINVAL; else file->use_pkey_index = 1; - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); return ret; } @@ -783,7 +792,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) if (!port) return -ENXIO; - down_write(&port->mutex); + mutex_lock(&port->file_mutex); if (!port->ib_dev) { ret = -ENXIO; @@ -797,7 +806,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) goto out; } - spin_lock_init(&file->recv_lock); + mutex_init(&file->mutex); spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); INIT_LIST_HEAD(&file->send_list); @@ -809,7 +818,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); out: - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); return ret; } @@ -821,7 +830,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp) int already_dead; int i; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; @@ -834,14 +844,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp) list_del(&file->port_list); - downgrade_write(&file->port->mutex); + mutex_unlock(&file->mutex); if (!already_dead) for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) if (file->agent[i]) ib_unregister_mad_agent(file->agent[i]); - up_read(&file->port->mutex); + mutex_unlock(&file->port->file_mutex); kfree(file); kref_put(&dev->ref, ib_umad_release_dev); @@ -914,10 +924,10 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) }; int ret = 0; - down_write(&port->mutex); + mutex_lock(&port->file_mutex); if (port->ib_dev) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); up(&port->sm_sem); @@ -981,7 +991,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); - init_rwsem(&port->mutex); + mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); port->dev = cdev_alloc(); @@ -1052,6 +1062,7 @@ err_cdev: static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; + int already_dead; int id; class_set_devdata(port->class_dev, NULL); @@ -1067,42 +1078,22 @@ static void ib_umad_kill_port(struct ib_umad_port *port) umad_port[port->dev_num] = NULL; spin_unlock(&port_lock); - down_write(&port->mutex); + mutex_lock(&port->file_mutex); port->ib_dev = NULL; - /* - * Now go through the list of files attached to this port and - * unregister all of their MAD agents. We need to hold - * port->mutex while doing this to avoid racing with - * ib_umad_close(), but we can't hold the mutex for writing - * while calling ib_unregister_mad_agent(), since that might - * deadlock by calling back into queue_packet(). So we - * downgrade our lock to a read lock, and then drop and - * reacquire the write lock for the next iteration. - * - * We do list_del_init() on the file's list_head so that the - * list_del in ib_umad_close() is still OK, even after the - * file is removed from the list. - */ - while (!list_empty(&port->file_list)) { - file = list_entry(port->file_list.next, struct ib_umad_file, - port_list); - + list_for_each_entry(file, &port->file_list, port_list) { + mutex_lock(&file->mutex); + already_dead = file->agents_dead; file->agents_dead = 1; - list_del_init(&file->port_list); - - downgrade_write(&port->mutex); + mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) if (file->agent[id]) ib_unregister_mad_agent(file->agent[id]); - - up_read(&port->mutex); - down_write(&port->mutex); } - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); clear_bit(port->dev_num, dev_map); } diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 36b98989b15e..7e7b5a66f042 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,5 +1,4 @@ -EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \ - -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core +EXTRA_CFLAGS += -Idrivers/net/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index eec6a30840ca..03c5ff62889a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -179,7 +179,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) setup.size = 1UL << cq->size_log2; setup.credits = 65535; setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type == T3B) + if (rdev_p->t3cdev_p->type != T3A) setup.ovfl_mode = 0; else setup.ovfl_mode = 1; @@ -584,7 +584,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, { u32 i, nr_wqe, copy_len; u8 *copy_data; - u8 wr_len, utx_len; /* lenght in 8 byte flit */ + u8 wr_len, utx_len; /* length in 8 byte flit */ enum t3_wr_flags flag; __be64 *wqe; u64 utx_cmd; diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index c84d4ac49355..969d4d928455 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -315,7 +315,7 @@ struct t3_rdma_init_wr { __be32 ird; __be64 qp_dma_addr; /* 7 */ __be32 qp_dma_size; /* 8 */ - u32 irs; + __be32 irs; }; struct t3_genbit { @@ -324,7 +324,8 @@ struct t3_genbit { }; enum rdma_init_wr_flags { - RECVS_POSTED = 1, + RECVS_POSTED = (1<<0), + PRIV_QP = (1<<1), }; union t3_wr { diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 20ba372dd182..e9a08fa3dffe 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -332,7 +332,7 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, } }; - if (ip_route_output_flow(&rt, &fl, NULL, 0)) + if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) return NULL; return rt; } @@ -1118,7 +1118,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) status2errno(rpl->status)); connect_reply_upcall(ep, status2errno(rpl->status)); state_set(&ep->com, DEAD); - if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status)) + if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); @@ -1249,7 +1249,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, skb_trim(skb, sizeof(struct cpl_tid_release)); skb_get(skb); - if (tdev->type == T3B) + if (tdev->type != T3A) release_tid(tdev, hwtid, skb); else { struct cpl_pass_accept_rpl *rpl; diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index a6c2c4ba29e6..73bfd1656f86 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -122,6 +122,13 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, *total_size += buffer_list[i].size; if (i > 0) mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; } if (*total_size > 0xFFFFFFFFULL) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b5436ca92e68..df1838f8f94d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -39,6 +39,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/ethtool.h> +#include <linux/rtnetlink.h> #include <asm/io.h> #include <asm/irq.h> @@ -645,7 +646,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err) goto err; - if (udata && t3b_device(rhp)) { + if (udata && !t3a_device(rhp)) { uresp.pbl_addr = (mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3; PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__, @@ -1053,7 +1054,9 @@ static ssize_t show_fw_ver(struct class_device *cdev, char *buf) struct net_device *lldev = dev->rdev.t3cdev_p->lldev; PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.fw_version); } @@ -1065,7 +1068,9 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) struct net_device *lldev = dev->rdev.t3cdev_p->lldev; PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.driver); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index dd89b6b91f9c..ea2cdd73dd85 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -208,36 +208,19 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, struct ib_recv_wr *wr) { - int i, err = 0; - u32 pbl_addr[4]; - u8 page_size[4]; + int i; if (wr->num_sge > T3_MAX_SGE) return -EINVAL; - err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % - (1UL << (12 + page_size[i]))); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); + wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } for (; i < T3_MAX_SGE; i++) { wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; } return 0; } @@ -659,6 +642,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking heirarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); @@ -668,6 +652,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -678,7 +663,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) { - if (t3b_device(qhp->rhp)) + if (qhp->ibqp.uobject) cxio_set_wq_in_error(&qhp->wq); else __flush_qp(qhp, flag); @@ -732,6 +717,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, @@ -847,10 +833,11 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, disconnect = 1; ep = qhp->ep; } + flush_qp(qhp, &flag); break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (t3b_device(qhp->rhp)) + if (qhp->ibqp.uobject) cxio_set_wq_in_error(&qhp->wq); if (!internal) terminate = 1; diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 453eb995c1d4..194c1c30cf63 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -1,7 +1,7 @@ /* * IBM eServer eHCA Infiniband device driver for Linux on POWER * - * adress vector functions + * address vector functions * * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> * Khadija Souissi <souissik@de.ibm.com> @@ -76,8 +76,12 @@ int ehca_calc_ipd(struct ehca_shca *shca, int port, link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 87f12d4312a7..92cce8aacbb7 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -94,9 +94,14 @@ struct ehca_sma_attr { struct ehca_sport { struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_aqp1; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; enum ib_port_state port_state; struct ehca_sma_attr saved_attr; + u32 pma_qp_nr; }; #define HCA_CAP_MR_PGSIZE_4K 0x80000000 @@ -141,6 +146,14 @@ enum ehca_ext_qp_type { EQPT_SRQ = 3, }; +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + struct ehca_qp { union { struct ib_qp ib_qp; @@ -164,10 +177,18 @@ struct ehca_qp { struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; /* mmap counter for resources mapped into user space */ u32 mm_count_squeue; u32 mm_count_rqueue; u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -322,6 +343,8 @@ extern int ehca_static_rate; extern int ehca_port_act_time; extern int ehca_use_hp_mr; extern int ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 79c25f51c21e..0467c158d4a9 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -246,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%li" + "ehca_cq=%p cq_num=%x h_ret=%li " "counter=%i act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 3f617b27b954..b5ca94c6b8d9 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -62,6 +62,7 @@ #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) @@ -354,17 +355,34 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) { u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + int suppress_event; + /* replay modify_qp for sqps */ + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + suppress_event = !sport->ibqp_sqp[IB_QPT_GSI]; + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!suppress_event) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + /* AQP1 was destroyed, ignore this event */ + if (suppress_event) + break; + + sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); ehca_query_sma_attr(shca, port, - &shca->sport[port - 1].saved_attr); + &sport->saved_attr); } else { - shca->sport[port - 1].port_state = IB_PORT_DOWN; + sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, "is inactive"); } @@ -378,13 +396,15 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) ehca_warn(&shca->ib_device, "disruptive port " "%d configuration change", port); - shca->sport[port - 1].port_state = IB_PORT_DOWN; + sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, "is inactive"); - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); } else notify_port_conf_change(shca, port); break; @@ -394,6 +414,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) case 0x33: /* trace stopped */ ehca_err(&shca->ib_device, "Traced stopped."); break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; default: ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", ec, shca->ib_device.name); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 5485799cdc8d..a8a2ea585d2f 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -187,6 +187,11 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context); int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + void ehca_poll_eqs(unsigned long data); int ehca_calc_ipd(struct ehca_shca *shca, int port, @@ -200,4 +205,6 @@ void ehca_free_fw_ctrlblock(void *ptr); #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif +void ehca_recover_sqp(struct ib_qp *sqp); + #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 90d4334179bf..a86ebcc79a95 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -43,13 +43,14 @@ #ifdef CONFIG_PPC_64K_PAGES #include <linux/slab.h> #endif + #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0024" +#define HCAD_VERSION "0025" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); @@ -66,6 +67,7 @@ int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; int ehca_scaling_code = 0; int ehca_mr_largepage = 1; +int ehca_lock_hcalls = -1; module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); @@ -77,6 +79,7 @@ module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO); module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); MODULE_PARM_DESC(open_aqp1, "AQP1 on startup (0: no (default), 1: yes)"); @@ -87,7 +90,8 @@ MODULE_PARM_DESC(hw_level, "hardware level" " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); MODULE_PARM_DESC(nr_ports, - "number of connected ports (default: 2)"); + "number of connected ports (-1: autodetect, 1: port one only, " + "2: two ports (default)"); MODULE_PARM_DESC(use_hp_mr, "high performance MRs (0: no (default), 1: yes)"); MODULE_PARM_DESC(port_act_time, @@ -102,6 +106,9 @@ MODULE_PARM_DESC(scaling_code, MODULE_PARM_DESC(mr_largepage, "use large page for MR (0: use PAGE_SIZE (default), " "1: use large page depending on MR size"); +MODULE_PARM_DESC(lock_hcalls, + "serialize all hCalls made by the driver " + "(default: autodetect)"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -258,6 +265,7 @@ static struct cap_descr { { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, }; static int ehca_sense_attributes(struct ehca_shca *shca) @@ -333,6 +341,12 @@ static int ehca_sense_attributes(struct ehca_shca *shca) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC); + /* translate supported MR page sizes; always support 4K */ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; if (ehca_mr_largepage) { /* support extra sizes only if enabled */ @@ -458,7 +472,7 @@ int ehca_init_device(struct ehca_shca *shca) shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; shca->ib_device.attach_mcast = ehca_attach_mcast; shca->ib_device.detach_mcast = ehca_detach_mcast; - /* shca->ib_device.process_mad = ehca_process_mad; */ + shca->ib_device.process_mad = ehca_process_mad; shca->ib_device.mmap = ehca_mmap; if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { @@ -498,7 +512,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) } sport->ibcq_aqp1 = ibcq; - if (sport->ibqp_aqp1) { + if (sport->ibqp_sqp[IB_QPT_GSI]) { ehca_err(&shca->ib_device, "AQP1 QP is already created."); ret = -EPERM; goto create_aqp1; @@ -524,7 +538,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) ret = PTR_ERR(ibqp); goto create_aqp1; } - sport->ibqp_aqp1 = ibqp; + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; return 0; @@ -537,7 +551,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport) { int ret; - ret = ib_destroy_qp(sport->ibqp_aqp1); + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); if (ret) { ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); return ret; @@ -577,6 +591,11 @@ static struct attribute_group ehca_drv_attr_grp = { .attrs = ehca_drv_attrs }; +static struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + #define EHCA_RESOURCE_ATTR(name) \ static ssize_t ehca_show_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -675,7 +694,7 @@ static int __devinit ehca_probe(struct of_device *dev, struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; - int ret; + int ret, i; handle = of_get_property(dev->node, "ibm,hca-handle", NULL); if (!handle) { @@ -696,6 +715,8 @@ static int __devinit ehca_probe(struct of_device *dev, return -ENOMEM; } mutex_init(&shca->modify_mutex); + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); shca->ofdev = dev; shca->ipz_hca_handle.handle = *handle; @@ -886,6 +907,9 @@ static struct of_platform_driver ehca_driver = { .match_table = ehca_device_table, .probe = ehca_probe, .remove = ehca_remove, + .driver = { + .groups = ehca_drv_attr_groups, + }, }; void ehca_poll_eqs(unsigned long data) @@ -913,7 +937,7 @@ void ehca_poll_eqs(unsigned long data) ehca_process_eq(shca, 0); } } - mod_timer(&poll_eqs_timer, jiffies + HZ); + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); spin_unlock(&shca_list_lock); } @@ -944,10 +968,6 @@ int __init ehca_module_init(void) goto module_init2; } - ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_gen_err("Cannot create driver attributes ret=%d", ret); - if (ehca_poll_all_eqs != 1) { ehca_gen_err("WARNING!!!"); ehca_gen_err("It is possible to lose interrupts."); @@ -973,7 +993,6 @@ void __exit ehca_module_exit(void) if (ehca_poll_all_eqs == 1) del_timer_sync(&poll_eqs_timer); - sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); ibmebus_unregister_driver(&ehca_driver); ehca_destroy_slab_caches(); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 2e3e6547cb78..1012f15a7140 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -592,10 +592,8 @@ static struct ehca_qp *internal_create_qp( goto create_qp_exit1; } - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - parms.sigtype = HCALL_SIGT_EVERY; - else - parms.sigtype = HCALL_SIGT_BY_WQE; + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; @@ -618,6 +616,10 @@ static struct ehca_qp *internal_create_qp( parms.squeue.max_sge = max_send_sge; parms.rqueue.max_sge = max_recv_sge; + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { if (HAS_SQ(my_qp)) ehca_determine_small_queue( @@ -650,6 +652,8 @@ static struct ehca_qp *internal_create_qp( parms.squeue.act_nr_sges = 1; parms.rqueue.act_nr_sges = 1; } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; break; case IB_QPT_UD: case IB_QPT_GSI: @@ -729,12 +733,31 @@ static struct ehca_qp *internal_create_qp( init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; my_qp->init_attr = *init_attr; + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit4; + } + } + } + /* NOTE: define_apq0() not supported yet */ if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - goto create_qp_exit4; + goto create_qp_exit5; } } @@ -743,7 +766,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit4; + goto create_qp_exit5; } } @@ -769,12 +792,18 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit4; + goto create_qp_exit6; } } return my_qp; +create_qp_exit6: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit5: + kfree(my_qp->mod_qp_parm); + create_qp_exit4: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); @@ -838,7 +867,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, /* copy back return values */ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge; + srq_init_attr->attr.max_sge = 3; /* drive SRQ into RTR state */ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); @@ -858,7 +887,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT" + ehca_err(pd->device, "Could not modify SRQ to INIT " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -872,7 +901,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ" + ehca_err(pd->device, "Could not enable SRQ " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -886,7 +915,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR" + ehca_err(pd->device, "Could not modify SRQ to RTR " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -992,7 +1021,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, unsigned long flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); @@ -1180,6 +1209,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); } if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; if (attr->port_num < 1 || attr->port_num > shca->num_ports) { ret = -EINVAL; ehca_err(ibqp->device, "Invalid port=%x. " @@ -1188,6 +1219,29 @@ static int internal_modify_qp(struct ib_qp *ibqp, shca->num_ports); goto modify_qp_exit2; } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } mqpcb->prim_phys_port = attr->port_num; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); } @@ -1203,7 +1257,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, attr->ah_attr.static_rate, &mqpcb->max_static_rate)) { ret = -EINVAL; @@ -1244,6 +1298,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; mqpcb->path_mtu = attr->path_mtu; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); } @@ -1302,7 +1358,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, attr->alt_ah_attr.static_rate, &mqpcb->max_static_rate_al)) { ret = -EINVAL; @@ -1467,6 +1523,8 @@ modify_qp_exit1: int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); @@ -1479,9 +1537,100 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, return -EINVAL; } + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + return 0; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + return internal_modify_qp(ibqp, attr, attr_mask, 0); } +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; +} + int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) @@ -1750,7 +1899,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) } srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->max_sge = qpcb->actual_nr_sges_in_rq_wqe; + srq_attr->max_sge = 3; srq_attr->srq_limit = EHCA_BMASK_GET( MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); @@ -1769,6 +1918,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; u32 cur_pid = current->tgid; u32 qp_num = my_qp->real_qp_num; int ret; @@ -1815,6 +1965,14 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, port_num = my_qp->init_attr.port_num; qp_type = my_qp->init_attr.qp_type; + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + /* no support for IB_QPT_SMI yet */ if (qp_type == IB_QPT_GSI) { struct ib_event event; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index ea91360835d3..2ce8cffb8664 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -50,6 +50,9 @@ #include "hcp_if.h" #include "hipz_fns.h" +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, struct ib_recv_wr *recv_wr) @@ -81,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, if (ehca_debug_level) { ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } return 0; @@ -135,7 +138,8 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr) + const struct ib_send_wr *send_wr, + int hidden) { u32 idx; u64 dma_length; @@ -176,7 +180,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->wr_flag = 0; - if (send_wr->send_flags & IB_SEND_SIGNALED) + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; if (send_wr->opcode == IB_WR_SEND_WITH_IMM || @@ -199,10 +205,14 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; wqe_p->local_ee_context_qkey = remote_qkey; - if (!send_wr->wr.ud.ah) { + if (unlikely(!send_wr->wr.ud.ah)) { ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); return -EINVAL; } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); wqe_p->u.ud_av.ud_av = my_av->av; @@ -255,6 +265,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, } /* eof idx */ wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + break; default: @@ -355,13 +374,49 @@ static inline void map_ib_wc_status(u32 cqe_status, *wc_status = IB_WC_SUCCESS; } +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + struct ib_send_wr **bad_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); struct ib_send_wr *cur_send_wr; - struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; unsigned long flags; @@ -369,37 +424,33 @@ int ehca_post_send(struct ib_qp *qp, /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_s, flags); + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + /* loop processes list of send reqs */ for (cur_send_wr = send_wr; cur_send_wr != NULL; cur_send_wr = cur_send_wr->next) { - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - if (bad_send_wr) - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); - } - goto post_send_exit0; - } - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); - /* - * if something failed, - * reset the free entry pointer to the start value - */ + ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0); if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); - } + /* if one or more WQEs were successful, don't fail */ + if (wqe_cnt) + ret = 0; goto post_send_exit0; } wqe_cnt++; @@ -410,6 +461,7 @@ int ehca_post_send(struct ib_qp *qp, post_send_exit0: iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + my_qp->message_count += wqe_cnt; spin_unlock_irqrestore(&my_qp->spinlock_s, flags); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index f0792e5fbd02..706d97ad5555 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -39,15 +39,18 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <rdma/ib_mad.h> -#include <linux/module.h> -#include <linux/err.h> #include "ehca_classes.h" #include "ehca_tools.h" -#include "ehca_qes.h" #include "ehca_iverbs.h" #include "hcp_if.h" +#define IB_MAD_STATUS_REDIRECT __constant_htons(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION __constant_htons(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD __constant_htons(0x0008) + +#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001) /** * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue @@ -86,6 +89,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca, port, ret); return ret; } + shca->sport[port - 1].pma_qp_nr = pma_qp_nr; + ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", + port, pma_qp_nr); break; default: ehca_err(&shca->ib_device, "invalid qp_type=%x", @@ -93,6 +99,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca, return H_PARAMETER; } + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + for (counter = 0; shca->sport[port - 1].port_state != IB_PORT_ACTIVE && counter < ehca_port_act_time; @@ -109,3 +118,85 @@ u64 ehca_define_sqp(struct ehca_shca *shca, return H_SUCCESS; } + +struct ib_perf { + struct ib_mad_hdr mad_hdr; + u8 reserved[40]; + u8 data[192]; +} __attribute__ ((packed)); + + +static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct ib_perf *in_perf = (struct ib_perf *)in_mad; + struct ib_perf *out_perf = (struct ib_perf *)out_mad; + struct ib_class_port_info *poi = + (struct ib_class_port_info *)out_perf->data; + struct ehca_shca *shca = + container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_sport *sport = &shca->sport[port_num - 1]; + + ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); + + *out_mad = *in_mad; + + if (in_perf->mad_hdr.class_version != 1) { + ehca_warn(ibdev, "Unsupported class_version=%x", + in_perf->mad_hdr.class_version); + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; + goto perf_reply; + } + + switch (in_perf->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_SET: + /* set class port info for redirection */ + out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; + out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; + memset(poi, 0, sizeof(*poi)); + poi->base_version = 1; + poi->class_version = 1; + poi->resp_time_value = 18; + poi->redirect_lid = sport->saved_attr.lid; + poi->redirect_qp = sport->pma_qp_nr; + poi->redirect_qkey = IB_QP1_QKEY; + poi->redirect_pkey = IB_DEFAULT_PKEY_FULL; + + ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", + sport->saved_attr.lid, sport->pma_qp_nr); + break; + + case IB_MGMT_METHOD_GET_RESP: + return IB_MAD_RESULT_FAILURE; + + default: + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; + break; + } + +perf_reply: + out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + int ret; + + if (!port_num || port_num > ibdev->phys_port_cnt) + return IB_MAD_RESULT_FAILURE; + + /* accept only pma request */ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return IB_MAD_RESULT_SUCCESS; + + ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); + ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index c16a21374bb5..7029aa653751 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -120,26 +120,21 @@ static long ehca_plpar_hcall_norets(unsigned long opcode, unsigned long arg7) { long ret; - int i, sleep_msecs, do_lock; - unsigned long flags; + int i, sleep_msecs; + unsigned long flags = 0; ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - /* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */ - if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) { - arg7 = 0; /* better not upset firmware */ - do_lock = 1; - } - for (i = 0; i < 5; i++) { - if (do_lock) + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) spin_lock_irqsave(&hcall_lock, flags); ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - if (do_lock) + if (ehca_lock_hcalls) spin_unlock_irqrestore(&hcall_lock, flags); if (H_IS_LONG_BUSY(ret)) { @@ -174,24 +169,22 @@ static long ehca_plpar_hcall9(unsigned long opcode, unsigned long arg9) { long ret; - int i, sleep_msecs, do_lock; + int i, sleep_msecs; unsigned long flags = 0; ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); - /* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */ - do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)); - for (i = 0; i < 5; i++) { - if (do_lock) + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) spin_lock_irqsave(&hcall_lock, flags); ret = plpar_hcall9(opcode, outs, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); - if (do_lock) + if (ehca_lock_hcalls) spin_unlock_irqrestore(&hcall_lock, flags); if (H_IS_LONG_BUSY(ret)) { @@ -821,7 +814,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, return ehca_plpar_hcall_norets(H_FREE_RESOURCE, adapter_handle.handle, /* r4 */ mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 5); + 0, 0, 0, 0, 0); } u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index 485b8400359e..bf996c7acc42 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -378,6 +378,7 @@ struct hipz_query_hca { #define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) #define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) #define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) /* query port response block */ struct hipz_query_port { diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 851df8a75e79..414621095540 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -82,6 +82,16 @@ #define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ /* + * These 3 values (SDR and DDR may be ORed for auto-speed + * negotiation) are used for the 3rd argument to path_f_set_ib_cfg + * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They + * are also the the possible values for ipath_link_speed_enabled and active + * The values were chosen to match values used within the IB spec. + */ +#define IPATH_IB_SDR 1 +#define IPATH_IB_DDR 2 + +/* * stats maintained by the driver. For now, at least, this is global * to all minor devices. */ @@ -433,8 +443,9 @@ struct ipath_user_info { #define IPATH_CMD_UNUSED_2 26 #define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ #define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ +#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ -#define IPATH_CMD_MAX 28 +#define IPATH_CMD_MAX 29 /* * Poll types @@ -477,6 +488,8 @@ struct ipath_cmd { __u64 port_info; /* enable/disable receipt of packets */ __u32 recv_ctrl; + /* enable/disable armlaunch errors (non-zero to enable) */ + __u32 armlaunch_ctrl; /* partition key to set */ __u16 part_key; /* user address of __u32 bitmask of active slaves */ @@ -579,7 +592,7 @@ struct ipath_flash { struct infinipath_counters { __u64 LBIntCnt; __u64 LBFlowStallCnt; - __u64 Reserved1; + __u64 TxSDmaDescCnt; /* was Reserved1 */ __u64 TxUnsupVLErrCnt; __u64 TxDataPktCnt; __u64 TxFlowPktCnt; @@ -615,12 +628,26 @@ struct infinipath_counters { __u64 RxP6HdrEgrOvflCnt; __u64 RxP7HdrEgrOvflCnt; __u64 RxP8HdrEgrOvflCnt; - __u64 Reserved6; - __u64 Reserved7; + __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */ + __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */ + __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */ __u64 IBStatusChangeCnt; __u64 IBLinkErrRecoveryCnt; __u64 IBLinkDownedCnt; __u64 IBSymbolErrCnt; + /* The following are new for IBA7220 */ + __u64 RxVL15DroppedPktCnt; + __u64 RxOtherLocalPhyErrCnt; + __u64 PcieRetryBufDiagQwordCnt; + __u64 ExcessBufferOvflCnt; + __u64 LocalLinkIntegrityErrCnt; + __u64 RxVlErrCnt; + __u64 RxDlidFltrCnt; }; /* diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 08d8ae148cd0..a03bd28d9b48 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -395,12 +395,9 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail; } - /* - * Return the address of the WC as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = (__u64) wc; + __u64 offset = 0; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) @@ -424,7 +421,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) else n = head - tail; if (unlikely((u32)cqe < n)) { - ret = -EOVERFLOW; + ret = -EINVAL; goto bail_unlock; } for (n = 0; tail != head; n++) { @@ -450,6 +447,18 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) struct ipath_mmap_info *ip = cq->ip; ipath_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index 19c56e6491eb..d6f69532d83f 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -55,7 +55,7 @@ #define __IPATH_PKTDBG 0x80 /* print packet data */ /* print process startup (init)/exit messages */ #define __IPATH_PROCDBG 0x100 -/* print mmap/nopage stuff, not using VDBG any more */ +/* print mmap/fault stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x200 #define __IPATH_ERRPKTDBG 0x400 #define __IPATH_USER_SEND 0x1000 /* use user mode send */ @@ -81,7 +81,7 @@ #define __IPATH_VERBDBG 0x0 /* very verbose debug */ #define __IPATH_PKTDBG 0x0 /* print packet data */ #define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */ -/* print mmap/nopage stuff, not using VDBG any more */ +/* print mmap/fault stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x0 #define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ #define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 1f152ded1e3c..d5ff6ca2db30 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -121,6 +121,9 @@ static struct pci_driver ipath_driver = { .probe = ipath_init_one, .remove = __devexit_p(ipath_remove_one), .id_table = ipath_pci_tbl, + .driver = { + .groups = ipath_driver_attr_groups, + }, }; static void ipath_check_status(struct work_struct *work) @@ -331,6 +334,8 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) udelay(1); } + ipath_disable_armlaunch(dd); + writeq(0, piobuf); /* length 0, no dwords actually sent */ ipath_flush_wc(); @@ -362,6 +367,7 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) done: /* disarm piobuf, so it's available again */ ipath_disarm_piobufs(dd, pbnum, 1); + ipath_enable_armlaunch(dd); } static int __devinit ipath_init_one(struct pci_dev *pdev, @@ -800,31 +806,37 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, unsigned cnt) { unsigned i, last = first + cnt; - u64 sendctrl, sendorig; + unsigned long flags; ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); - sendorig = dd->ipath_sendctrl; for (i = first; i < last; i++) { - sendctrl = sendorig | INFINIPATH_S_DISARM | - (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* + * The disarm-related bits are write-only, so it + * is ok to OR them in with our copy of sendctrl + * while we hold the lock. + */ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - sendctrl); + dd->ipath_sendctrl | INFINIPATH_S_DISARM | + (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); + /* can't disarm bufs back-to-back per iba7220 spec */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /* - * Write it again with current value, in case ipath_sendctrl changed - * while we were looping; no critical bits that would require - * locking. - * - * disable PIOAVAILUPD, then re-enable, reading scratch in + * Disable PIOAVAILUPD, then re-enable, reading scratch in * between. This seems to avoid a chip timing race that causes - * pioavail updates to memory to stop. + * pioavail updates to memory to stop. We xor as we don't + * know the state of the bit when we're called. */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - sendorig & ~INFINIPATH_S_PIOBUFAVAILUPD); - sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /** @@ -1000,12 +1012,10 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len) * ipath_get_egrbuf - get an eager buffer * @dd: the infinipath device * @bufnum: the eager buffer to get - * @err: unused * * must only be called if ipath_pd[port] is known to be allocated */ -static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, - int err) +static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) { return dd->ipath_port0_skbinfo ? (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; @@ -1097,13 +1107,14 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd, /* * ipath_kreceive - receive a packet - * @dd: the infinipath device + * @pd: the infinipath port * * called from interrupt handler for errors or receive interrupt */ -void ipath_kreceive(struct ipath_devdata *dd) +void ipath_kreceive(struct ipath_portdata *pd) { u64 *rc; + struct ipath_devdata *dd = pd->port_dd; void *ebuf; const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ @@ -1118,8 +1129,8 @@ void ipath_kreceive(struct ipath_devdata *dd) goto bail; } - l = dd->ipath_port0head; - hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); + l = pd->port_head; + hdrqtail = ipath_get_rcvhdrtail(pd); if (l == hdrqtail) goto bail; @@ -1128,7 +1139,7 @@ reloop: u32 qp; u8 *bthbytes; - rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2)); + rc = (u64 *) (pd->port_rcvhdrq + (l << 2)); hdr = (struct ipath_message_header *)&rc[1]; /* * could make a network order version of IPATH_KD_QP, and @@ -1153,7 +1164,7 @@ reloop: etail = ipath_hdrget_index((__le32 *) rc); if (tlen > sizeof(*hdr) || etype == RCVHQ_RCV_TYPE_NON_KD) - ebuf = ipath_get_egrbuf(dd, etail, 0); + ebuf = ipath_get_egrbuf(dd, etail); } /* @@ -1188,7 +1199,7 @@ reloop: be32_to_cpu(hdr->bth[0]) & 0xff); else { /* - * error packet, type of error unknown. + * error packet, type of error unknown. * Probably type 3, but we don't know, so don't * even try to print the opcode, etc. */ @@ -1238,7 +1249,7 @@ reloop: * earlier packets, we "almost" guarantee we have covered * that case. */ - u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); + u32 hqtail = ipath_get_rcvhdrtail(pd); if (hqtail != hdrqtail) { hdrqtail = hqtail; reloop = 1; /* loop 1 extra time at most */ @@ -1248,7 +1259,7 @@ reloop: pkttot += i; - dd->ipath_port0head = l; + pd->port_head = l; if (pkttot > ipath_stats.sps_maxpkts_call) ipath_stats.sps_maxpkts_call = pkttot; @@ -1332,14 +1343,9 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) /* * Chip Errata: bug 6641; even and odd qwords>3 are swapped */ - if (i > 3) { - if (i & 1) - piov = le64_to_cpu( - dd->ipath_pioavailregs_dma[i - 1]); - else - piov = le64_to_cpu( - dd->ipath_pioavailregs_dma[i + 1]); - } else + if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) + piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); + else piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); pchg = _IPATH_ALL_CHECKBITS & ~(dd->ipath_pioavailshadow[i] ^ piov); @@ -1598,7 +1604,8 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, /* clear for security and sanity on each use */ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); + if (pd->port_rcvhdrtail_kvaddr) + memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); /* * tell chip each time we init it, even if we are re-using previous @@ -1614,77 +1621,6 @@ bail: return ret; } -int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id, - u64 bits_to_wait_for, u64 * valp) -{ - unsigned long timeout; - u64 lastval, val; - int ret; - - lastval = ipath_read_kreg64(dd, reg_id); - /* wait a ridiculously long time */ - timeout = jiffies + msecs_to_jiffies(5); - do { - val = ipath_read_kreg64(dd, reg_id); - /* set so they have something, even on failures. */ - *valp = val; - if ((val & bits_to_wait_for) == bits_to_wait_for) { - ret = 0; - break; - } - if (val != lastval) - ipath_cdbg(VERBOSE, "Changed from %llx to %llx, " - "waiting for %llx bits\n", - (unsigned long long) lastval, - (unsigned long long) val, - (unsigned long long) bits_to_wait_for); - cond_resched(); - if (time_after(jiffies, timeout)) { - ipath_dbg("Didn't get bits %llx in register 0x%x, " - "got %llx\n", - (unsigned long long) bits_to_wait_for, - reg_id, (unsigned long long) *valp); - ret = -ENODEV; - break; - } - } while (1); - - return ret; -} - -/** - * ipath_waitfor_mdio_cmdready - wait for last command to complete - * @dd: the infinipath device - * - * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go - * away indicating the last command has completed. It doesn't return data - */ -int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) -{ - unsigned long timeout; - u64 val; - int ret; - - /* wait a ridiculously long time */ - timeout = jiffies + msecs_to_jiffies(5); - do { - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio); - if (!(val & IPATH_MDIO_CMDVALID)) { - ret = 0; - break; - } - cond_resched(); - if (time_after(jiffies, timeout)) { - ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n", - (unsigned long long) val); - ret = -ENODEV; - break; - } - } while (1); - - return ret; -} - /* * Flush all sends that might be in the ready to send state, as well as any @@ -2053,6 +1989,8 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) */ void ipath_shutdown_device(struct ipath_devdata *dd) { + unsigned long flags; + ipath_dbg("Shutting down the device\n"); dd->ipath_flags |= IPATH_LINKUNK; @@ -2073,9 +2011,13 @@ void ipath_shutdown_device(struct ipath_devdata *dd) * gracefully stop all sends allowing any in progress to trickle out * first. */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl = 0; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); /* flush it */ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + /* * enough for anything that's going to trickle out to have actually * done so. @@ -2217,25 +2159,15 @@ static int __init infinipath_init(void) goto bail_unit; } - ret = ipath_driver_create_group(&ipath_driver.driver); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver " - "sysfs entries: error %d\n", -ret); - goto bail_pci; - } - ret = ipath_init_ipathfs(); if (ret < 0) { printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " "ipathfs: error %d\n", -ret); - goto bail_group; + goto bail_pci; } goto bail; -bail_group: - ipath_driver_remove_group(&ipath_driver.driver); - bail_pci: pci_unregister_driver(&ipath_driver); @@ -2250,8 +2182,6 @@ static void __exit infinipath_cleanup(void) { ipath_exit_ipathfs(); - ipath_driver_remove_group(&ipath_driver.driver); - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); pci_unregister_driver(&ipath_driver); @@ -2344,5 +2274,34 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) } return 0; } + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing on + * the 7220, which is count-based, rather than trigger-based. Safe for the + * driver check, since it's at init. Not completely safe when used for + * user-mode checking, since some error checking can be lost, but not + * particularly risky, and only has problematic side-effects in the face of + * very buggy user code. There is no reference counting, but that's also + * fine, given the intended use. + */ +void ipath_enable_armlaunch(struct ipath_devdata *dd) +{ + dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + INFINIPATH_E_SPIOARMLAUNCH); + dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); +} + +void ipath_disable_armlaunch(struct ipath_devdata *dd) +{ + /* so don't re-enable if already set */ + dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; + dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); +} + module_init(infinipath_init); module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index e7c25dbbcdc9..e28a42f53769 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -510,10 +510,10 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -524,10 +524,10 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -574,7 +574,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) struct ipath_devdata *dd0 = ipath_lookup(0); if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { - u8 *bguid, oguid; + u8 oguid; dd->ipath_guid = dd0->ipath_guid; bguid = (u8 *) & dd->ipath_guid; @@ -616,9 +616,9 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) goto bail; } - down(&dd->ipath_eep_sem); + mutex_lock(&dd->ipath_eep_lock); eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (eep_stat) { ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); @@ -674,7 +674,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) * elsewhere for backward-compatibility. */ char *snp = dd->ipath_serial; - int len; memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); snp[sizeof ifp->if_sprefix] = '\0'; len = strlen(snp); @@ -764,14 +763,14 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) /* Grab semaphore and read current EEPROM. If we get an * error, let go, but if not, keep it until we finish write. */ - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (ret) { ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); goto free_bail; } ret = ipath_eeprom_internal_read(dd, 0, buf, len); if (ret) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "Unable read EEPROM for logging\n"); goto free_bail; } @@ -779,7 +778,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", csum, ifp->if_csum); ret = 1; @@ -849,7 +848,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 1); ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); } - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (ret) ipath_dev_err(dd, "Failed updating EEPROM\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 5de3243a47c3..7e025c8e01b6 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -169,7 +169,7 @@ static int ipath_get_base_info(struct file *fp, kinfo->spi_piocnt = dd->ipath_pbufsport; kinfo->spi_piobufbase = (u64) pd->port_piobufs; kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_palign * pd->port_port; + dd->ipath_ureg_align * pd->port_port; } else if (master) { kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + (dd->ipath_pbufsport % subport_cnt); @@ -186,7 +186,7 @@ static int ipath_get_base_info(struct file *fp, } if (shared) { kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_palign * pd->port_port; + dd->ipath_ureg_align * pd->port_port; kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; @@ -742,11 +742,12 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, * updated and correct itself, even in the face of software * bugs. */ - *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + if (pd->port_rcvhdrtail_kvaddr) + ipath_clear_rcvhdrtail(pd); + set_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); } else - clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + clear_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -881,7 +882,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) egrcnt = dd->ipath_rcvegrcnt; /* TID number offset for this port */ - egroff = pd->port_port * egrcnt; + egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; egrsize = dd->ipath_rcvegrbufsize; ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); @@ -1049,11 +1050,6 @@ static int mmap_piobufs(struct vm_area_struct *vma, phys = dd->ipath_physaddr + piobufs; - /* - * Don't mark this as non-cached, or we don't get the - * write combining behavior we want on the PIO buffers! - */ - #if defined(__powerpc__) /* There isn't a generic way to specify writethrough mappings */ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; @@ -1120,33 +1116,24 @@ bail: } /* - * ipath_file_vma_nopage - handle a VMA page fault. + * ipath_file_vma_fault - handle a VMA page fault. */ -static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static int ipath_file_vma_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) { - unsigned long offset = address - vma->vm_start; - struct page *page = NOPAGE_SIGBUS; - void *pageptr; + struct page *page; - /* - * Convert the vmalloc address into a struct page. - */ - pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); - page = vmalloc_to_page(pageptr); + page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); if (!page) - goto out; - - /* Increment the reference count. */ + return VM_FAULT_SIGBUS; get_page(page); - if (type) - *type = VM_FAULT_MINOR; -out: - return page; + vmf->page = page; + + return 0; } static struct vm_operations_struct ipath_file_vm_ops = { - .nopage = ipath_file_vma_nopage, + .fault = ipath_file_vma_fault, }; static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, @@ -1284,7 +1271,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) goto bail; } - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; if (!pd->port_subport_cnt) { /* port is not shared */ piocnt = dd->ipath_pbufsport; @@ -1400,7 +1387,10 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd, pollflag = ipath_poll_hdrqfull(pd); head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); - tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr; + if (pd->port_rcvhdrtail_kvaddr) + tail = ipath_get_rcvhdrtail(pd); + else + tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); if (head != tail) pollflag |= POLLIN | POLLRDNORM; @@ -1410,7 +1400,7 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd, /* flush waiting flag so we don't miss an event */ wmb(); - set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, + set_bit(pd->port_port + dd->ipath_r_intravail_shift, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, @@ -1790,6 +1780,7 @@ static int find_shared_port(struct file *fp, } port_fp(fp) = pd; subport_fp(fp) = pd->port_cnt++; + pd->port_subpid[subport_fp(fp)] = current->pid; tidcursor_fp(fp) = 0; pd->active_slaves |= 1 << subport_fp(fp); ipath_cdbg(PROC, @@ -1920,8 +1911,7 @@ static int ipath_do_user_init(struct file *fp, */ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - dd->ipath_lastegrheads[pd->port_port] = -1; - dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; + pd->port_lastrcvhdrqtail = -1; ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", pd->port_port, head32); pd->port_tidcursor = 0; /* start at beginning after open */ @@ -1941,11 +1931,13 @@ static int ipath_do_user_init(struct file *fp, * We explictly set the in-memory copy to 0 beforehand, so we don't * have to wait to be sure the DMA update has happened. */ - *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + if (pd->port_rcvhdrtail_kvaddr) + ipath_clear_rcvhdrtail(pd); + set_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); + dd->ipath_rcvctrl & + ~(1ULL << dd->ipath_r_tailupd_shift)); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); /* Notify any waiting slaves */ @@ -2022,6 +2014,7 @@ static int ipath_close(struct inode *in, struct file *fp) * the slave(s) don't wait for receive data forever. */ pd->active_slaves &= ~(1 << fd->subport); + pd->port_subpid[fd->subport] = 0; mutex_unlock(&ipath_mutex); goto bail; } @@ -2054,9 +2047,9 @@ static int ipath_close(struct inode *in, struct file *fp) if (dd->ipath_kregbase) { int i; /* atomically clear receive enable port and intr avail. */ - clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port, + clear_bit(dd->ipath_r_portenable_shift + port, &dd->ipath_rcvctrl); - clear_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, + clear_bit(pd->port_port + dd->ipath_r_intravail_shift, &dd->ipath_rcvctrl); ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -2149,11 +2142,15 @@ static int ipath_get_slave_info(struct ipath_portdata *pd, static int ipath_force_pio_avail_update(struct ipath_devdata *dd) { - u64 reg = dd->ipath_sendctrl; + unsigned long flags; - clear_bit(IPATH_S_PIOBUFAVAILUPD, ®); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); return 0; } @@ -2227,6 +2224,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.poll_type; src = &ucmd->cmd.poll_type; break; + case IPATH_CMD_ARMLAUNCH_CTRL: + copy = sizeof(cmd.cmd.armlaunch_ctrl); + dest = &cmd.cmd.armlaunch_ctrl; + src = &ucmd->cmd.armlaunch_ctrl; + break; default: ret = -EINVAL; goto bail; @@ -2302,6 +2304,12 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, case IPATH_CMD_POLL_TYPE: pd->poll_type = cmd.cmd.poll_type; break; + case IPATH_CMD_ARMLAUNCH_CTRL: + if (cmd.cmd.armlaunch_ctrl) + ipath_enable_armlaunch(pd->port_dd); + else + ipath_disable_armlaunch(pd->port_dd); + break; } if (ret >= 0) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 262c25db05cd..23faba9d21eb 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -108,21 +108,16 @@ static const struct file_operations atomic_stats_ops = { .read = atomic_stats_read, }; -#define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64) - static ssize_t atomic_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - u64 counters[NUM_COUNTERS]; - u16 i; + struct infinipath_counters counters; struct ipath_devdata *dd; dd = file->f_path.dentry->d_inode->i_private; + dd->ipath_f_read_counters(dd, &counters); - for (i = 0; i < NUM_COUNTERS; i++) - counters[i] = ipath_snap_cntr(dd, i); - - return simple_read_from_buffer(buf, count, ppos, counters, + return simple_read_from_buffer(buf, count, ppos, &counters, sizeof counters); } @@ -243,8 +238,7 @@ static int create_device_files(struct super_block *sb, snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, - (struct file_operations *) &simple_dir_operations, - dd); + &simple_dir_operations, dd); if (ret) { printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index ddbebe4bdb27..9e2ced3cdc5e 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -148,10 +148,57 @@ struct _infinipath_do_not_use_kernel_regs { unsigned long long ReservedSW2[4]; }; -#define IPATH_KREG_OFFSET(field) (offsetof(struct \ - _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) +struct _infinipath_do_not_use_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 Reserved1; + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 Reserved6; + __u64 Reserved7; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; +}; + +#define IPATH_KREG_OFFSET(field) (offsetof( \ + struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) #define IPATH_CREG_OFFSET(field) (offsetof( \ - struct infinipath_counters, field) / sizeof(u64)) + struct _infinipath_do_not_use_counters, field) / sizeof(u64)) static const struct ipath_kregs ipath_ht_kregs = { .kr_control = IPATH_KREG_OFFSET(Control), @@ -282,6 +329,9 @@ static const struct ipath_cregs ipath_ht_cregs = { #define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL #define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL +#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf +#define IBA6110_IBCS_LINKSTATE_SHIFT 4 + /* kr_extstatus bits */ #define INFINIPATH_EXTS_FREQSEL 0x2 #define INFINIPATH_EXTS_SERDESSEL 0x4 @@ -296,6 +346,12 @@ static const struct ipath_cregs ipath_ht_cregs = { #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL #define INFINIPATH_RT_BUFSIZE_SHIFT 48 +#define INFINIPATH_R_INTRAVAIL_SHIFT 16 +#define INFINIPATH_R_TAILUPD_SHIFT 31 + +/* kr_xgxsconfig bits */ +#define INFINIPATH_XGXS_RESET 0x7ULL + /* * masks and bits that are different in different chips, or present only * in one @@ -652,7 +708,6 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, "with ID %u\n", boardrev); snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", boardrev); - ret = 1; break; } if (n) @@ -686,6 +741,13 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, dd->ipath_htspeed); ret = 0; + /* + * set here, not in ipath_init_*_funcs because we have to do + * it after we can read chip registers. + */ + dd->ipath_ureg_align = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); + bail: return ret; } @@ -969,7 +1031,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, do { u8 cap_type; - /* the HT capability type byte is 3 bytes after the + /* + * The HT capability type byte is 3 bytes after the * capability byte. */ if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { @@ -982,6 +1045,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, } while ((pos = pci_find_next_capability(pdev, pos, PCI_CAP_ID_HT))); + dd->ipath_flags |= IPATH_SWAP_PIOBUFS; + bail: return ret; } @@ -1074,11 +1139,55 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, static void ipath_init_ht_variables(struct ipath_devdata *dd) { + /* + * setup the register offsets, since they are different for each + * chip + */ + dd->ipath_kregs = &ipath_ht_kregs; + dd->ipath_cregs = &ipath_ht_cregs; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; dd->ipath_gpio_sda = IPATH_GPIO_SDA; dd->ipath_gpio_scl = IPATH_GPIO_SCL; + /* + * Fill in data for field-values that change in newer chips. + * We dynamically specify only the mask for LINKTRAININGSTATE + * and only the shift for LINKSTATE, as they are the only ones + * that change. Also precalculate the 3 link states of interest + * and the combined mask. + */ + dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT; + dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK; + dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << + dd->ibcs_ls_shift) | dd->ibcs_lts_mask; + dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); + dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); + dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); + + /* + * Fill in data for ibcc field-values that change in newer chips. + * We dynamically specify only the mask for LINKINITCMD + * and only the shift for LINKCMD and MAXPKTLEN, as they are + * the only ones that change. + */ + dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; + dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; + dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + + /* Fill in shifts for RcvCtrl. */ + dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; + dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; + dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; + dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */ + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << @@ -1135,6 +1244,8 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; + dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; /* * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. @@ -1148,9 +1259,17 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) INFINIPATH_HWE_RXEMEMPARITYERR_MASK << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; - dd->ipath_eep_st_masks[2].errs_to_log = - INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; + dd->delay_mult = 2; /* SDR, 4X, can't change */ + + dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + dd->ipath_link_speed_supported = IPATH_IB_SDR; + dd->ipath_link_width_enabled = IB_WIDTH_4X; + dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; + /* these can't change for this chip, so set once */ + dd->ipath_link_width_active = dd->ipath_link_width_enabled; + dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; } /** @@ -1205,14 +1324,16 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) val &= ~INFINIPATH_HWE_HTCMISCERR4; /* - * PLL ignored because MDIO interface has a logic problem - * for reads, on Comstock and Ponderosa. BRINGUP + * PLL ignored because unused MDIO interface has a logic problem */ if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) val &= ~INFINIPATH_HWE_SERDESPLLFAILED; dd->ipath_hwerrmask = val; } + + + /** * ipath_ht_bringup_serdes - bring up the serdes * @dd: the infinipath device @@ -1284,16 +1405,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) } val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & - INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { - val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK << - INFINIPATH_XGXS_MDIOADDR_SHIFT); - /* - * we use address 3 - */ - val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - change = 1; - } if (val & INFINIPATH_XGXS_RESET) { /* normally true after boot */ val &= ~INFINIPATH_XGXS_RESET; @@ -1329,21 +1440,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) (unsigned long long) ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - if (!ipath_waitfor_mdio_cmdready(dd)) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio, - ipath_mdio_req(IPATH_MDIO_CMD_READ, 31, - IPATH_MDIO_CTRL_XGXS_REG_8, - 0)); - if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio, - IPATH_MDIO_DATAVALID, &val)) - ipath_dbg("Never got MDIO data for XGXS status " - "read\n"); - else - ipath_cdbg(VERBOSE, "MDIO Read reg8, " - "'bank' 31 %x\n", (u32) val); - } else - ipath_dbg("Never got MDIO cmdready for XGXS status read\n"); - return ret; /* for now, say we always succeeded */ } @@ -1396,6 +1492,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, pa |= lenvalid | INFINIPATH_RT_VALID; } } + writeq(pa, tidptr); } @@ -1526,8 +1623,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) } ipath_get_eeprom_info(dd); - if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && - dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { + if (dd->ipath_boardrev == 5) { /* * Later production QHT7040 has same changes as QHT7140, so * can use GPIO interrupts. They have serial #'s starting @@ -1602,6 +1698,210 @@ static void ipath_ht_free_irq(struct ipath_devdata *dd) dd->ipath_intconfig = 0; } +static struct ipath_message_header * +ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) +{ + return (struct ipath_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports) +{ + dd->ipath_portcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_p0_rcvegrcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); +} + +static void ipath_ht_read_counters(struct ipath_devdata *dd, + struct infinipath_counters *cntrs) +{ + cntrs->LBIntCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); + cntrs->LBFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); + cntrs->TxSDmaDescCnt = 0; + cntrs->TxUnsupVLErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); + cntrs->TxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); + cntrs->TxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); + cntrs->TxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); + cntrs->TxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); + cntrs->TxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); + cntrs->TxUnderrunCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); + cntrs->TxFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); + cntrs->TxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); + cntrs->RxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); + cntrs->RxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); + cntrs->RxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); + cntrs->RxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); + cntrs->RxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); + cntrs->RxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); + cntrs->RxICRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); + cntrs->RxVCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); + cntrs->RxFlowCtrlErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); + cntrs->RxBadFormatCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); + cntrs->RxLinkProblemCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); + cntrs->RxEBPCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); + cntrs->RxLPCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); + cntrs->RxBufOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); + cntrs->RxTIDFullErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); + cntrs->RxTIDValidErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); + cntrs->RxPKeyMismatchCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); + cntrs->RxP0HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); + cntrs->RxP1HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); + cntrs->RxP2HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); + cntrs->RxP3HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); + cntrs->RxP4HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); + cntrs->RxP5HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt)); + cntrs->RxP6HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt)); + cntrs->RxP7HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt)); + cntrs->RxP8HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt)); + cntrs->RxP9HdrEgrOvflCnt = 0; + cntrs->RxP10HdrEgrOvflCnt = 0; + cntrs->RxP11HdrEgrOvflCnt = 0; + cntrs->RxP12HdrEgrOvflCnt = 0; + cntrs->RxP13HdrEgrOvflCnt = 0; + cntrs->RxP14HdrEgrOvflCnt = 0; + cntrs->RxP15HdrEgrOvflCnt = 0; + cntrs->RxP16HdrEgrOvflCnt = 0; + cntrs->IBStatusChangeCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); + cntrs->IBLinkErrRecoveryCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); + cntrs->IBLinkDownedCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); + cntrs->IBSymbolErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); + cntrs->RxVL15DroppedPktCnt = 0; + cntrs->RxOtherLocalPhyErrCnt = 0; + cntrs->PcieRetryBufDiagQwordCnt = 0; + cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; + cntrs->LocalLinkIntegrityErrCnt = + (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors; + cntrs->RxVlErrCnt = 0; + cntrs->RxDlidFltrCnt = 0; +} + + +/* no interrupt fallback for these chips */ +static int ipath_ht_nointr_fallback(struct ipath_devdata *dd) +{ + return 0; +} + + +/* + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void ipath_ht_xgxs_reset(struct ipath_devdata *dd) +{ + u64 val, prev_val; + + prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val = prev_val | INFINIPATH_XGXS_RESET; + prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control & ~INFINIPATH_C_LINKENABLE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); +} + + +static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which) +{ + int ret; + + switch (which) { + case IPATH_IB_CFG_LWID: + ret = dd->ipath_link_width_active; + break; + case IPATH_IB_CFG_SPD: + ret = dd->ipath_link_speed_active; + break; + case IPATH_IB_CFG_LWID_ENB: + ret = dd->ipath_link_width_enabled; + break; + case IPATH_IB_CFG_SPD_ENB: + ret = dd->ipath_link_speed_enabled; + break; + default: + ret = -ENOTSUPP; + break; + } + return ret; +} + + +/* we assume range checking is already done, if needed */ +static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) +{ + int ret = 0; + + if (which == IPATH_IB_CFG_LWID_ENB) + dd->ipath_link_width_enabled = val; + else if (which == IPATH_IB_CFG_SPD_ENB) + dd->ipath_link_speed_enabled = val; + else + ret = -ENOTSUPP; + return ret; +} + + +static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b) +{ +} + + +static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) +{ + ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs), + ipath_ib_linktrstate(dd, ibcs)); + return 0; +} + + /** * ipath_init_iba6110_funcs - set up the chip-specific function pointers * @dd: the infinipath device @@ -1626,22 +1926,19 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd) dd->ipath_f_setextled = ipath_setup_ht_setextled; dd->ipath_f_get_base_info = ipath_ht_get_base_info; dd->ipath_f_free_irq = ipath_ht_free_irq; - - /* - * initialize chip-specific variables - */ dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; + dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback; + dd->ipath_f_get_msgheader = ipath_ht_get_msgheader; + dd->ipath_f_config_ports = ipath_ht_config_ports; + dd->ipath_f_read_counters = ipath_ht_read_counters; + dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset; + dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg; + dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg; + dd->ipath_f_config_jint = ipath_ht_config_jint; + dd->ipath_f_ib_updown = ipath_ht_ib_updown; /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_ht_kregs; - dd->ipath_cregs = &ipath_ht_cregs; - - /* - * do very early init that is needed before ipath_f_bus is - * called + * initialize chip-specific variables */ ipath_init_ht_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 0103d6f4847b..c7a2f50824c0 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -145,10 +145,57 @@ struct _infinipath_do_not_use_kernel_regs { unsigned long long Reserved12; }; -#define IPATH_KREG_OFFSET(field) (offsetof(struct \ - _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) +struct _infinipath_do_not_use_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 Reserved1; + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 Reserved6; + __u64 Reserved7; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; +}; + +#define IPATH_KREG_OFFSET(field) (offsetof( \ + struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) #define IPATH_CREG_OFFSET(field) (offsetof( \ - struct infinipath_counters, field) / sizeof(u64)) + struct _infinipath_do_not_use_counters, field) / sizeof(u64)) static const struct ipath_kregs ipath_pe_kregs = { .kr_control = IPATH_KREG_OFFSET(Control), @@ -282,6 +329,9 @@ static const struct ipath_cregs ipath_pe_cregs = { #define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL #define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL +#define IBA6120_IBCS_LINKTRAININGSTATE_MASK 0xf +#define IBA6120_IBCS_LINKSTATE_SHIFT 4 + /* kr_extstatus bits */ #define INFINIPATH_EXTS_FREQSEL 0x2 #define INFINIPATH_EXTS_SERDESSEL 0x4 @@ -296,6 +346,9 @@ static const struct ipath_cregs ipath_pe_cregs = { #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) +#define INFINIPATH_R_INTRAVAIL_SHIFT 16 +#define INFINIPATH_R_TAILUPD_SHIFT 31 + /* 6120 specific hardware errors... */ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), @@ -320,10 +373,28 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) -static int ipath_pe_txe_recover(struct ipath_devdata *); static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *, u32, unsigned long); +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip bug can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + */ +static void ipath_pe_txe_recover(struct ipath_devdata *dd) +{ + if (ipath_unordered_wc()) + ipath_dbg("Recovering from TXE PIO parity error\n"); + else { + ++ipath_stats.sps_txeparity; + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + } +} + + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -403,35 +474,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) + if (hwerrs & TXE_PIO_PARITY) { + ipath_pe_txe_recover(dd); hwerrs &= ~TXE_PIO_PARITY; - if (hwerrs) { - /* - * if any set that we aren't ignoring only make the - * complaint once, in case it's stuck or recurring, - * and we get here multiple times - * Force link down, so switch knows, and - * LEDs are turned off - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - ipath_setup_pe_setextled(dd, - INFINIPATH_IBCS_L_STATE_DOWN, - INFINIPATH_IBCS_LT_STATE_DISABLED); - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - /* - * Mark as having had an error for driver, and also - * for /sys and status word mapped to user programs. - * This marks unit as not usable, until reset - */ - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - dd->ipath_flags &= ~IPATH_INITTED; - } else { + } + if (!hwerrs) { static u32 freeze_cnt; freeze_cnt++; @@ -485,7 +532,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { /* - * If it occurs, it is left masked since the eternal + * If it occurs, it is left masked since the external * interface is unused */ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; @@ -563,6 +610,14 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, dd->ipath_f_put_tid = ipath_pe_put_tid_2; } + + /* + * set here, not in ipath_init_*_funcs because we have to do + * it after we can read chip registers. + */ + dd->ipath_ureg_align = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); + return ret; } @@ -667,17 +722,8 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); prev_val = val; - if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & - INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { - val &= - ~(INFINIPATH_XGXS_MDIOADDR_MASK << - INFINIPATH_XGXS_MDIOADDR_SHIFT); - /* MDIO address 3 */ - val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - } - if (val & INFINIPATH_XGXS_RESET) { + if (val & INFINIPATH_XGXS_RESET) val &= ~INFINIPATH_XGXS_RESET; - } if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { /* need to compensate for Tx inversion in partner */ @@ -707,21 +753,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) (unsigned long long) ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - if (!ipath_waitfor_mdio_cmdready(dd)) { - ipath_write_kreg( - dd, dd->ipath_kregs->kr_mdio, - ipath_mdio_req(IPATH_MDIO_CMD_READ, 31, - IPATH_MDIO_CTRL_XGXS_REG_8, 0)); - if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio, - IPATH_MDIO_DATAVALID, &val)) - ipath_dbg("Never got MDIO data for XGXS " - "status read\n"); - else - ipath_cdbg(VERBOSE, "MDIO Read reg8, " - "'bank' 31 %x\n", (u32) val); - } else - ipath_dbg("Never got MDIO cmdready for XGXS status read\n"); - return ret; } @@ -902,12 +933,27 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, else ipath_dev_err(dd, "Can't find PCI Express " "capability!\n"); + + dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + dd->ipath_link_speed_supported = IPATH_IB_SDR; + dd->ipath_link_width_enabled = IB_WIDTH_4X; + dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; + /* these can't change for this chip, so set once */ + dd->ipath_link_width_active = dd->ipath_link_width_enabled; + dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; return 0; } static void ipath_init_pe_variables(struct ipath_devdata *dd) { /* + * setup the register offsets, since they are different for each + * chip + */ + dd->ipath_kregs = &ipath_pe_kregs; + dd->ipath_cregs = &ipath_pe_cregs; + + /* * bits for selecting i2c direction and values, * used for I2C serial flash */ @@ -916,6 +962,43 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) dd->ipath_gpio_sda = IPATH_GPIO_SDA; dd->ipath_gpio_scl = IPATH_GPIO_SCL; + /* + * Fill in data for field-values that change in newer chips. + * We dynamically specify only the mask for LINKTRAININGSTATE + * and only the shift for LINKSTATE, as they are the only ones + * that change. Also precalculate the 3 link states of interest + * and the combined mask. + */ + dd->ibcs_ls_shift = IBA6120_IBCS_LINKSTATE_SHIFT; + dd->ibcs_lts_mask = IBA6120_IBCS_LINKTRAININGSTATE_MASK; + dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << + dd->ibcs_ls_shift) | dd->ibcs_lts_mask; + dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); + dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); + dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); + + /* + * Fill in data for ibcc field-values that change in newer chips. + * We dynamically specify only the mask for LINKINITCMD + * and only the shift for LINKCMD and MAXPKTLEN, as they are + * the only ones that change. + */ + dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; + dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; + dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + + /* Fill in shifts for RcvCtrl. */ + dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; + dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; + dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; + dd->ipath_r_portcfg_shift = 0; /* Not on IBA6120 */ + /* variables for sanity checking interrupt and errors */ dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << @@ -963,6 +1046,8 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; + dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; /* * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. @@ -984,6 +1069,7 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + dd->delay_mult = 2; /* SDR, 4X, can't change */ } /* setup the MSI stuff again after a reset. I'd like to just call @@ -1289,6 +1375,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) */ dd->ipath_rcvhdrentsize = 24; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; + dd->ipath_rhf_offset = 0; + dd->ipath_egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase); /* * To truly support a 4KB MTU (for usermode), we need to @@ -1359,34 +1448,204 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd) dd->ipath_irq = 0; } + +static struct ipath_message_header * +ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) +{ + return (struct ipath_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void ipath_pe_config_ports(struct ipath_devdata *dd, ushort cfgports) +{ + dd->ipath_portcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_p0_rcvegrcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); +} + +static void ipath_pe_read_counters(struct ipath_devdata *dd, + struct infinipath_counters *cntrs) +{ + cntrs->LBIntCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); + cntrs->LBFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); + cntrs->TxSDmaDescCnt = 0; + cntrs->TxUnsupVLErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); + cntrs->TxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); + cntrs->TxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); + cntrs->TxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); + cntrs->TxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); + cntrs->TxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); + cntrs->TxUnderrunCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); + cntrs->TxFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); + cntrs->TxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); + cntrs->RxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); + cntrs->RxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); + cntrs->RxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); + cntrs->RxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); + cntrs->RxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); + cntrs->RxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); + cntrs->RxICRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); + cntrs->RxVCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); + cntrs->RxFlowCtrlErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); + cntrs->RxBadFormatCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); + cntrs->RxLinkProblemCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); + cntrs->RxEBPCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); + cntrs->RxLPCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); + cntrs->RxBufOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); + cntrs->RxTIDFullErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); + cntrs->RxTIDValidErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); + cntrs->RxPKeyMismatchCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); + cntrs->RxP0HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); + cntrs->RxP1HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); + cntrs->RxP2HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); + cntrs->RxP3HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); + cntrs->RxP4HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); + cntrs->RxP5HdrEgrOvflCnt = 0; + cntrs->RxP6HdrEgrOvflCnt = 0; + cntrs->RxP7HdrEgrOvflCnt = 0; + cntrs->RxP8HdrEgrOvflCnt = 0; + cntrs->RxP9HdrEgrOvflCnt = 0; + cntrs->RxP10HdrEgrOvflCnt = 0; + cntrs->RxP11HdrEgrOvflCnt = 0; + cntrs->RxP12HdrEgrOvflCnt = 0; + cntrs->RxP13HdrEgrOvflCnt = 0; + cntrs->RxP14HdrEgrOvflCnt = 0; + cntrs->RxP15HdrEgrOvflCnt = 0; + cntrs->RxP16HdrEgrOvflCnt = 0; + cntrs->IBStatusChangeCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); + cntrs->IBLinkErrRecoveryCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); + cntrs->IBLinkDownedCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); + cntrs->IBSymbolErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); + cntrs->RxVL15DroppedPktCnt = 0; + cntrs->RxOtherLocalPhyErrCnt = 0; + cntrs->PcieRetryBufDiagQwordCnt = 0; + cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; + cntrs->LocalLinkIntegrityErrCnt = dd->ipath_lli_errs; + cntrs->RxVlErrCnt = 0; + cntrs->RxDlidFltrCnt = 0; +} + + +/* no interrupt fallback for these chips */ +static int ipath_pe_nointr_fallback(struct ipath_devdata *dd) +{ + return 0; +} + + /* - * On platforms using this chip, and not having ordered WC stores, we - * can get TXE parity errors due to speculative reads to the PIO buffers, - * and this, due to a chip bug can result in (many) false parity error - * reports. So it's a debug print on those, and an info print on systems - * where the speculative reads don't occur. - * Because we can get lots of false errors, we have no upper limit - * on recovery attempts on those platforms. + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. */ -static int ipath_pe_txe_recover(struct ipath_devdata *dd) +static void ipath_pe_xgxs_reset(struct ipath_devdata *dd) { - if (ipath_unordered_wc()) - ipath_dbg("Recovering from TXE PIO parity error\n"); - else { - int cnt = ++ipath_stats.sps_txeparity; - if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { - if (cnt == IPATH_MAX_PARITY_ATTEMPTS) - ipath_dev_err(dd, - "Too many attempts to recover from " - "TXE parity, giving up\n"); - return 0; - } - dev_info(&dd->pcidev->dev, - "Recovering from TXE PIO parity error\n"); + u64 val, prev_val; + + prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val = prev_val | INFINIPATH_XGXS_RESET; + prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control & ~INFINIPATH_C_LINKENABLE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); +} + + +static int ipath_pe_get_ib_cfg(struct ipath_devdata *dd, int which) +{ + int ret; + + switch (which) { + case IPATH_IB_CFG_LWID: + ret = dd->ipath_link_width_active; + break; + case IPATH_IB_CFG_SPD: + ret = dd->ipath_link_speed_active; + break; + case IPATH_IB_CFG_LWID_ENB: + ret = dd->ipath_link_width_enabled; + break; + case IPATH_IB_CFG_SPD_ENB: + ret = dd->ipath_link_speed_enabled; + break; + default: + ret = -ENOTSUPP; + break; } - return 1; + return ret; +} + + +/* we assume range checking is already done, if needed */ +static int ipath_pe_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) +{ + int ret = 0; + + if (which == IPATH_IB_CFG_LWID_ENB) + dd->ipath_link_width_enabled = val; + else if (which == IPATH_IB_CFG_SPD_ENB) + dd->ipath_link_speed_enabled = val; + else + ret = -ENOTSUPP; + return ret; } +static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b) +{ +} + + +static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) +{ + ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs), + ipath_ib_linktrstate(dd, ibcs)); + return 0; +} + + /** * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device @@ -1407,7 +1666,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; dd->ipath_f_clear_tids = ipath_pe_clear_tids; /* - * this may get changed after we read the chip revision, + * _f_put_tid may get changed after we read the chip revision, * but we start with the safe version for all revs */ dd->ipath_f_put_tid = ipath_pe_put_tid; @@ -1415,17 +1674,19 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_f_setextled = ipath_setup_pe_setextled; dd->ipath_f_get_base_info = ipath_pe_get_base_info; dd->ipath_f_free_irq = ipath_pe_free_irq; - - /* initialize chip-specific variables */ dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; + dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback; + dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset; + dd->ipath_f_get_msgheader = ipath_pe_get_msgheader; + dd->ipath_f_config_ports = ipath_pe_config_ports; + dd->ipath_f_read_counters = ipath_pe_read_counters; + dd->ipath_f_get_ib_cfg = ipath_pe_get_ib_cfg; + dd->ipath_f_set_ib_cfg = ipath_pe_set_ib_cfg; + dd->ipath_f_config_jint = ipath_pe_config_jint; + dd->ipath_f_ib_updown = ipath_pe_ib_updown; - /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_pe_kregs; - dd->ipath_cregs = &ipath_pe_cregs; + /* initialize chip-specific variables */ ipath_init_pe_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 9dd0bacf8461..4471674975cd 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -91,7 +91,7 @@ static int create_port0_egr(struct ipath_devdata *dd) struct ipath_skbinfo *skbinfo; int ret; - egrcnt = dd->ipath_rcvegrcnt; + egrcnt = dd->ipath_p0_rcvegrcnt; skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); if (skbinfo == NULL) { @@ -244,8 +244,7 @@ static int init_chip_first(struct ipath_devdata *dd, * cfgports. We do still check and report a difference, if * not same (should be impossible). */ - dd->ipath_portcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_f_config_ports(dd, ipath_cfgports); if (!ipath_cfgports) dd->ipath_cfgports = dd->ipath_portcnt; else if (ipath_cfgports <= dd->ipath_portcnt) { @@ -272,22 +271,7 @@ static int init_chip_first(struct ipath_devdata *dd, goto done; } - dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads) - * dd->ipath_cfgports, - GFP_KERNEL); - dd->ipath_lastrcvhdrqtails = - kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails) - * dd->ipath_cfgports, GFP_KERNEL); - - if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) { - ipath_dev_err(dd, "Unable to allocate head arrays, " - "failing\n"); - ret = -ENOMEM; - goto done; - } - pd = create_portdata0(dd); - if (!pd) { ipath_dev_err(dd, "Unable to allocate portdata for port " "0, failing\n"); @@ -345,10 +329,10 @@ static int init_chip_first(struct ipath_devdata *dd, dd->ipath_piobcnt2k, dd->ipath_pio2kbase); spin_lock_init(&dd->ipath_tid_lock); - + spin_lock_init(&dd->ipath_sendctrl_lock); spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); - sema_init(&dd->ipath_eep_sem, 1); + mutex_init(&dd->ipath_eep_lock); done: *pdp = pd; @@ -372,9 +356,9 @@ static int init_chip_reset(struct ipath_devdata *dd, *pdp = dd->ipath_pd[0]; /* ensure chip does no sends or receives while we re-initialize */ dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U; - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control); rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); if (dd->ipath_portcnt != rtmp) @@ -487,6 +471,7 @@ static void enable_chip(struct ipath_devdata *dd, struct ipath_portdata *pd, int reinit) { u32 val; + unsigned long flags; int i; if (!reinit) @@ -495,19 +480,21 @@ static void enable_chip(struct ipath_devdata *dd, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); /* Enable PIO send, and update of PIOavail regs to memory. */ dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | INFINIPATH_S_PIOBUFAVAILUPD; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); /* * enable port 0 receive, and receive interrupt. other ports * done as user opens and inits them. */ - dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD | - (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) | - (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT); + dd->ipath_rcvctrl = (1ULL << dd->ipath_r_tailupd_shift) | + (1ULL << dd->ipath_r_portenable_shift) | + (1ULL << dd->ipath_r_intravail_shift); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -523,12 +510,11 @@ static void enable_chip(struct ipath_devdata *dd, */ val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); - dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0); /* Initialize so we interrupt on next packet received */ (void)ipath_write_ureg(dd, ur_rcvhdrhead, dd->ipath_rhdrhead_intr_off | - dd->ipath_port0head, 0); + dd->ipath_pd[0]->port_head, 0); /* * by now pioavail updates to memory should have occurred, so @@ -542,12 +528,8 @@ static void enable_chip(struct ipath_devdata *dd, /* * Chip Errata bug 6641; even and odd qwords>3 are swapped. */ - if (i > 3) { - if (i & 1) - val = dd->ipath_pioavailregs_dma[i - 1]; - else - val = dd->ipath_pioavailregs_dma[i + 1]; - } + if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) + val = dd->ipath_pioavailregs_dma[i ^ 1]; else val = dd->ipath_pioavailregs_dma[i]; dd->ipath_pioavailshadow[i] = le64_to_cpu(val); @@ -690,12 +672,13 @@ done: */ int ipath_init_chip(struct ipath_devdata *dd, int reinit) { - int ret = 0, i; + int ret = 0; u32 val32, kpiobufs; u32 piobufs, uports; u64 val; struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ gfp_t gfp_flags = GFP_USER | __GFP_COMP; + unsigned long flags; ret = init_housekeeping(dd, &pd, reinit); if (ret) @@ -746,7 +729,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) kpiobufs = ipath_kpiobufs; if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { - i = (int) piobufs - + int i = (int) piobufs - (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); if (i < 0) i = 0; @@ -827,8 +810,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_PIOENABLE); + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); /* * before error clears, since we expect serdes pll errors during diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index c61f9da2964a..92e58c921522 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -683,7 +683,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) for (i = 0; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; if (i == 0) { - hd = dd->ipath_port0head; + hd = pd->port_head; tl = (u32) le64_to_cpu( *dd->ipath_hdrqtailptr); } else if (pd && pd->port_cnt && @@ -693,7 +693,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * except kernel */ tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; - if (tl == dd->ipath_lastrcvhdrqtails[i]) + if (tl == pd->port_lastrcvhdrqtail) continue; hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); @@ -703,7 +703,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) (!hd && tl == dd->ipath_hdrqlast)) { if (i == 0) chkerrpkts = 1; - dd->ipath_lastrcvhdrqtails[i] = tl; + pd->port_lastrcvhdrqtail = tl; pd->port_hdrqfull++; /* flush hdrqfull so that poll() sees it */ wmb(); @@ -712,6 +712,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) } } if (errs & INFINIPATH_E_RRCVEGRFULL) { + struct ipath_portdata *pd = dd->ipath_pd[0]; + /* * since this is of less importance and not likely to * happen without also getting hdrfull, only count @@ -719,7 +721,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * vs user) */ ipath_stats.sps_etidfull++; - if (dd->ipath_port0head != + if (pd->port_head != (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) chkerrpkts = 1; } @@ -795,6 +797,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) { int i, im; __le64 val; + unsigned long flags; /* disable error interrupts, to avoid confusion */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); @@ -813,11 +816,14 @@ void ipath_clear_freeze(struct ipath_devdata *dd) dd->ipath_control); /* ensure pio avail updates continue */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); /* * We just enabled pioavailupdate, so dma copy is almost certainly @@ -825,8 +831,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd) */ for (i = 0; i < dd->ipath_pioavregs; i++) { /* deal with 6110 chip bug */ - im = i > 3 ? ((i&1) ? i-1 : i+1) : i; - val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im); + im = i > 3 ? i ^ 1 : i; + val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im); dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] = le64_to_cpu(val); } @@ -849,7 +855,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) /* this is separate to allow for better optimization of ipath_intr() */ -static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) +static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) { /* * sometimes happen during driver init and unload, don't want @@ -877,7 +883,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) dd->ipath_f_free_irq(dd); } } - if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) { + if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { ipath_dev_err(dd, "%u unexpected interrupts, " "disabling interrupts completely\n", *unexpectp); @@ -892,7 +898,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) "ignoring\n"); } -static void ipath_bad_regread(struct ipath_devdata *dd) +static noinline void ipath_bad_regread(struct ipath_devdata *dd) { static int allbits; @@ -920,31 +926,9 @@ static void ipath_bad_regread(struct ipath_devdata *dd) } } -static void handle_port_pioavail(struct ipath_devdata *dd) -{ - u32 i; - /* - * start from port 1, since for now port 0 is never using - * wait_event for PIO - */ - for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (pd && pd->port_cnt && - dd->ipath_portpiowait & (1U << i)) { - clear_bit(i, &dd->ipath_portpiowait); - if (test_bit(IPATH_PORT_WAITING_PIO, - &pd->port_flag)) { - clear_bit(IPATH_PORT_WAITING_PIO, - &pd->port_flag); - wake_up_interruptible(&pd->port_wait); - } - } - } -} - static void handle_layer_pioavail(struct ipath_devdata *dd) { + unsigned long flags; int ret; ret = ipath_ib_piobufavail(dd->verbs_dev); @@ -953,9 +937,12 @@ static void handle_layer_pioavail(struct ipath_devdata *dd) return; set: - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /* @@ -969,7 +956,15 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) int i; int rcvdint = 0; - /* test_bit below needs this... */ + /* + * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and + * test_and_clear_bit(IPATH_PORT_WAITING_URG) below + * would both like timely updates of the bits so that + * we don't pass them by unnecessarily. the rmb() + * here ensures that we see them promptly -- the + * corresponding wmb()'s are in ipath_poll_urgent() + * and ipath_poll_next()... + */ rmb(); portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & dd->ipath_i_rcvavail_mask) @@ -980,7 +975,7 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) if (portr & (1 << i) && pd && pd->port_cnt) { if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, + clear_bit(i + dd->ipath_r_intravail_shift, &dd->ipath_rcvctrl); wake_up_interruptible(&pd->port_wait); rcvdint = 1; @@ -1039,7 +1034,7 @@ irqreturn_t ipath_intr(int irq, void *data) goto bail; } - istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); + istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus); if (unlikely(!istat)) { ipath_stats.sps_nullintr++; @@ -1180,7 +1175,7 @@ irqreturn_t ipath_intr(int irq, void *data) * for receive are at the bottom. */ if (chk0rcv) { - ipath_kreceive(dd); + ipath_kreceive(dd->ipath_pd[0]); istat &= ~port0rbits; } @@ -1191,12 +1186,14 @@ irqreturn_t ipath_intr(int irq, void *data) handle_urcv(dd, istat); if (istat & INFINIPATH_I_SPIOBUFAVAIL) { - clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - - if (dd->ipath_portpiowait) - handle_port_pioavail(dd); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); handle_layer_pioavail(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 8786dd7922e4..4cc0f95ea877 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/pci.h> #include <linux/dma-mapping.h> +#include <linux/mutex.h> #include <asm/io.h> #include <rdma/ib_verbs.h> @@ -140,6 +141,11 @@ struct ipath_portdata { u32 port_pionowait; /* total number of rcvhdrqfull errors */ u32 port_hdrqfull; + /* + * Used to suppress multiple instances of same + * port staying stuck at same point. + */ + u32 port_lastrcvhdrqtail; /* saved total number of rcvhdrqfull errors for poll edge trigger */ u32 port_hdrqfull_poll; /* total number of polled urgent packets */ @@ -148,6 +154,7 @@ struct ipath_portdata { u32 port_urgent_poll; /* pid of process using this port */ pid_t port_pid; + pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; /* same size as task_struct .comm[] */ char port_comm[16]; /* pkeys set by this use of this port */ @@ -166,6 +173,8 @@ struct ipath_portdata { u32 active_slaves; /* Type of packets or conditions we want to poll for */ u16 poll_type; + /* port rcvhdrq head offset */ + u32 port_head; }; struct sk_buff; @@ -182,6 +191,22 @@ struct ipath_skbinfo { dma_addr_t phys; }; +/* + * Possible IB config parameters for ipath_f_get/set_ib_cfg() + */ +#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */ +#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */ +#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */ +#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */ +#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */ +#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */ +#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */ +#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */ +#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */ +#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */ +#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */ + + struct ipath_devdata { struct list_head ipath_list; @@ -222,6 +247,8 @@ struct ipath_devdata { struct _ipath_layer ipath_layer; /* setup intr */ int (*ipath_f_intrsetup)(struct ipath_devdata *); + /* fallback to alternate interrupt type if possible */ + int (*ipath_f_intr_fallback)(struct ipath_devdata *); /* setup on-chip bus config */ int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); /* hard reset chip */ @@ -244,6 +271,18 @@ struct ipath_devdata { int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); /* free irq */ void (*ipath_f_free_irq)(struct ipath_devdata *); + struct ipath_message_header *(*ipath_f_get_msgheader) + (struct ipath_devdata *, __le32 *); + void (*ipath_f_config_ports)(struct ipath_devdata *, ushort); + int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int); + int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32); + void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16); + void (*ipath_f_read_counters)(struct ipath_devdata *, + struct infinipath_counters *); + void (*ipath_f_xgxs_reset)(struct ipath_devdata *); + /* per chip actions needed for IB Link up/down changes */ + int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64); + struct ipath_ibdev *verbs_dev; struct timer_list verbs_timer; /* total dwords sent (summed from counter) */ @@ -313,22 +352,12 @@ struct ipath_devdata { * supports, less gives more pio bufs/port, etc. */ u32 ipath_cfgports; - /* port0 rcvhdrq head offset */ - u32 ipath_port0head; /* count of port 0 hdrqfull errors */ u32 ipath_p0_hdrqfull; + /* port 0 number of receive eager buffers */ + u32 ipath_p0_rcvegrcnt; /* - * (*cfgports) used to suppress multiple instances of same - * port staying stuck at same point - */ - u32 *ipath_lastrcvhdrqtails; - /* - * (*cfgports) used to suppress multiple instances of same - * port staying stuck at same point - */ - u32 *ipath_lastegrheads; - /* * index of last piobuffer we used. Speeds up searching, by * starting at this point. Doesn't matter if multiple cpu's use and * update, last updater is only write that matters. Whenever it @@ -367,14 +396,15 @@ struct ipath_devdata { unsigned long ipath_wc_len; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; - /* shadow copy of all exptids physaddr; used only by funcsim */ - u64 *ipath_tidsimshadow; /* shadow copy of struct page *'s for exp tid pages */ struct page **ipath_pageshadow; /* shadow copy of dma handles for exp tid pages */ dma_addr_t *ipath_physshadow; - /* lock to workaround chip bug 9437 */ + u64 __iomem *ipath_egrtidbase; + /* lock to workaround chip bug 9437 and others */ + spinlock_t ipath_kernel_tid_lock; spinlock_t ipath_tid_lock; + spinlock_t ipath_sendctrl_lock; /* * IPATH_STATUS_*, @@ -395,6 +425,8 @@ struct ipath_devdata { void *ipath_dummy_hdrq; /* used after port close */ dma_addr_t ipath_dummy_hdrq_phys; + unsigned long ipath_ureg_align; /* user register alignment */ + /* * Shadow copies of registers; size indicates read access size. * Most of them are readonly, but some are write-only register, @@ -456,8 +488,6 @@ struct ipath_devdata { unsigned long ipath_rcvctrl; /* shadow kr_sendctrl */ unsigned long ipath_sendctrl; - /* ports waiting for PIOavail intr */ - unsigned long ipath_portpiowait; unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */ /* value we put in kr_rcvhdrcnt */ @@ -550,12 +580,26 @@ struct ipath_devdata { u8 ipath_minrev; /* board rev, from ipath_revision */ u8 ipath_boardrev; + + u8 ipath_r_portenable_shift; + u8 ipath_r_intravail_shift; + u8 ipath_r_tailupd_shift; + u8 ipath_r_portcfg_shift; + /* unit # of this chip, if present */ int ipath_unit; /* saved for restore after reset */ u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + /* link width supported */ + u8 ipath_link_width_supported; + /* link speed supported */ + u8 ipath_link_speed_supported; + u8 ipath_link_width_enabled; + u8 ipath_link_speed_enabled; + u8 ipath_link_width_active; + u8 ipath_link_speed_active; /* Rx Polarity inversion (compensate for ~tx on partner) */ u8 ipath_rx_pol_inv; @@ -590,6 +634,8 @@ struct ipath_devdata { */ u32 ipath_i_rcvavail_mask; u32 ipath_i_rcvurg_mask; + u16 ipath_i_rcvurg_shift; + u16 ipath_i_rcvavail_shift; /* * Register bits for selecting i2c direction and values, used for @@ -603,6 +649,29 @@ struct ipath_devdata { /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ spinlock_t ipath_gpio_lock; + /* + * IB link and linktraining states and masks that vary per chip in + * some way. Set at init, to avoid each IB status change interrupt + */ + u8 ibcs_ls_shift; + u8 ibcs_lts_mask; + u32 ibcs_mask; + u32 ib_init; + u32 ib_arm; + u32 ib_active; + + u16 ipath_rhf_offset; /* offset of RHF within receive header entry */ + + /* + * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol + * reg. Changes for IBA7220 + */ + u8 ibcc_lic_mask; /* LinkInitCmd */ + u8 ibcc_lc_shift; /* LinkCmd */ + u8 ibcc_mpl_shift; /* Maxpktlen */ + + u8 delay_mult; + /* used to override LED behavior */ u8 ipath_led_override; /* Substituted for normal value, if non-zero */ u16 ipath_led_override_timeoff; /* delta to next timer event */ @@ -616,7 +685,7 @@ struct ipath_devdata { /* control access to actual counters, timer */ spinlock_t ipath_eep_st_lock; /* control high-level access to EEPROM */ - struct semaphore ipath_eep_sem; + struct mutex ipath_eep_lock; /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ uint64_t ipath_traffic_wds; /* active time is kept in seconds, but logged in hours */ @@ -630,6 +699,10 @@ struct ipath_devdata { * each of the counters to increment. */ struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; + + /* interrupt mitigation reload register info */ + u16 ipath_jint_idle_ticks; /* idle clock ticks */ + u16 ipath_jint_max_packets; /* max packets across all ports */ }; /* Private data for file operations */ @@ -690,7 +763,7 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); int ipath_parse_ushort(const char *str, unsigned short *valp); -void ipath_kreceive(struct ipath_devdata *); +void ipath_kreceive(struct ipath_portdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); @@ -698,6 +771,8 @@ int ipath_set_linkstate(struct ipath_devdata *, u8); int ipath_set_mtu(struct ipath_devdata *, u16); int ipath_set_lid(struct ipath_devdata *, u32, u8); int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); +void ipath_enable_armlaunch(struct ipath_devdata *); +void ipath_disable_armlaunch(struct ipath_devdata *); /* for use in system calls, where we want to know device type, etc. */ #define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd @@ -744,9 +819,15 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); * are 64bit */ #define IPATH_32BITCOUNTERS 0x20000 /* can miss port0 rx interrupts */ + /* Interrupt register is 64 bits */ +#define IPATH_INTREG_64 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ /* Use GPIO interrupts for new counters */ #define IPATH_GPIO_ERRINTRS 0x100000 +#define IPATH_SWAP_PIOBUFS 0x200000 + /* Suppress heartbeat, even if turning off loopback */ +#define IPATH_NO_HRTBT 0x1000000 +#define IPATH_HAS_MULT_IB_SPEED 0x8000000 /* Bits in GPIO for the added interrupts */ #define IPATH_GPIO_PORT0_BIT 2 @@ -758,8 +839,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* portdata flag bit offsets */ /* waiting for a packet to arrive */ #define IPATH_PORT_WAITING_RCV 2 - /* waiting for a PIO buffer to be available */ -#define IPATH_PORT_WAITING_PIO 3 /* master has not finished initializing */ #define IPATH_PORT_MASTER_UNINIT 4 /* waiting for an urgent packet to arrive */ @@ -767,8 +846,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* free up any allocated data at closes */ void ipath_free_data(struct ipath_portdata *dd); -int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); -int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); @@ -792,33 +869,6 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); */ #define IPATH_DFLT_RCVHDRSIZE 9 -#define IPATH_MDIO_CMD_WRITE 1 -#define IPATH_MDIO_CMD_READ 2 -#define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */ -#define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */ -#define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */ -#define IPATH_MDIO_CTRL_STD 0x0 - -static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data) -{ - return (((u64) IPATH_MDIO_CLD_DIV) << 32) | - (cmd << 26) | - (dev << 21) | - (reg << 16) | - (data & 0xFFFF); -} - - /* signal and fifo status, in bank 31 */ -#define IPATH_MDIO_CTRL_XGXS_REG_8 0x8 - /* controls loopback, redundancy */ -#define IPATH_MDIO_CTRL_8355_REG_1 0x10 - /* premph, encdec, etc. */ -#define IPATH_MDIO_CTRL_8355_REG_2 0x11 - /* Kchars, etc. */ -#define IPATH_MDIO_CTRL_8355_REG_6 0x15 -#define IPATH_MDIO_CTRL_8355_REG_9 0x18 -#define IPATH_MDIO_CTRL_8355_REG_10 0x1D - int ipath_get_user_pages(unsigned long, size_t, struct page **); void ipath_release_user_pages(struct page **, size_t); void ipath_release_user_pages_on_close(struct page **, size_t); @@ -863,7 +913,7 @@ static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, return readl(regno + (u64 __iomem *) (dd->ipath_uregbase + (char __iomem *)dd->ipath_kregbase + - dd->ipath_palign * port)); + dd->ipath_ureg_align * port)); } /** @@ -880,7 +930,7 @@ static inline void ipath_write_ureg(const struct ipath_devdata *dd, { u64 __iomem *ubase = (u64 __iomem *) (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + - dd->ipath_palign * port); + dd->ipath_ureg_align * port); if (dd->ipath_kregbase) writeq(value, &ubase[regno]); } @@ -930,6 +980,53 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, (char __iomem *)dd->ipath_kregbase)); } +static inline void ipath_write_creg(const struct ipath_devdata *dd, + ipath_creg regno, u64 value) +{ + if (dd->ipath_kregbase) + writeq(value, regno + (u64 __iomem *) + (dd->ipath_cregbase + + (char __iomem *)dd->ipath_kregbase)); +} + +static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd) +{ + *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL; +} + +static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd) +{ + return (u32) le64_to_cpu(*((volatile __le64 *) + pd->port_rcvhdrtail_kvaddr)); +} + +static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r) +{ + return (dd->ipath_flags & IPATH_INTREG_64) ? + ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r); +} + +/* + * from contents of IBCStatus (or a saved copy), return linkstate + * Report ACTIVE_DEFER as ACTIVE, because we treat them the same + * everywhere, anyway (and should be, for almost all purposes). + */ +static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs) +{ + u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) & + INFINIPATH_IBCS_LINKSTATE_MASK; + if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER) + state = INFINIPATH_IBCS_L_STATE_ACTIVE; + return state; +} + +/* from contents of IBCStatus (or a saved copy), return linktrainingstate */ +static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs) +{ + return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & + dd->ibcs_lts_mask; +} + /* * sysfs interface. */ @@ -938,8 +1035,7 @@ struct device_driver; extern const char ib_ipath_version[]; -int ipath_driver_create_group(struct device_driver *); -void ipath_driver_remove_group(struct device_driver *); +extern struct attribute_group *ipath_driver_attr_groups[]; int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 85a4aefc6c03..8f32b17a5eed 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -128,9 +128,8 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, int ret; /* - * We use LKEY == zero to mean a physical kmalloc() address. - * This is a bit of a hack since we rely on dma_map_single() - * being reversible by calling bus_to_virt(). + * We use LKEY == zero for kernel virtual addresses + * (see ipath_get_dma_mr and ipath_dma.c). */ if (sge->lkey == 0) { struct ipath_pd *pd = to_ipd(qp->ibqp.pd); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 3d1432d1e3f4..d98d5f103700 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -934,6 +934,7 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, struct ib_pma_portsamplescontrol *p = (struct ib_pma_portsamplescontrol *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; unsigned long flags; u8 port_select = p->port_select; @@ -955,7 +956,10 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, p->counter_width = 4; /* 32 bit counters */ p->counter_mask0_9 = COUNTER_MASK0_9; spin_lock_irqsave(&dev->pending_lock, flags); - p->sample_status = dev->pma_sample_status; + if (crp->cr_psstat) + p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + p->sample_status = dev->pma_sample_status; p->sample_start = cpu_to_be32(dev->pma_sample_start); p->sample_interval = cpu_to_be32(dev->pma_sample_interval); p->tag = cpu_to_be16(dev->pma_tag); @@ -975,8 +979,9 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, struct ib_pma_portsamplescontrol *p = (struct ib_pma_portsamplescontrol *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; unsigned long flags; - u32 start; + u8 status; int ret; if (pmp->attr_mod != 0 || @@ -986,59 +991,67 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, goto bail; } - start = be32_to_cpu(p->sample_start); - if (start != 0) { - spin_lock_irqsave(&dev->pending_lock, flags); - if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) { - dev->pma_sample_status = - IB_PMA_SAMPLE_STATUS_STARTED; - dev->pma_sample_start = start; - dev->pma_sample_interval = - be32_to_cpu(p->sample_interval); - dev->pma_tag = be16_to_cpu(p->tag); - if (p->counter_select[0]) - dev->pma_counter_select[0] = - p->counter_select[0]; - if (p->counter_select[1]) - dev->pma_counter_select[1] = - p->counter_select[1]; - if (p->counter_select[2]) - dev->pma_counter_select[2] = - p->counter_select[2]; - if (p->counter_select[3]) - dev->pma_counter_select[3] = - p->counter_select[3]; - if (p->counter_select[4]) - dev->pma_counter_select[4] = - p->counter_select[4]; - } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_lock_irqsave(&dev->pending_lock, flags); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + dev->pma_sample_start = be32_to_cpu(p->sample_start); + dev->pma_sample_interval = be32_to_cpu(p->sample_interval); + dev->pma_tag = be16_to_cpu(p->tag); + dev->pma_counter_select[0] = p->counter_select[0]; + dev->pma_counter_select[1] = p->counter_select[1]; + dev->pma_counter_select[2] = p->counter_select[2]; + dev->pma_counter_select[3] = p->counter_select[3]; + dev->pma_counter_select[4] = p->counter_select[4]; + if (crp->cr_psstat) { + ipath_write_creg(dev->dd, crp->cr_psinterval, + dev->pma_sample_interval); + ipath_write_creg(dev->dd, crp->cr_psstart, + dev->pma_sample_start); + } else + dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; } + spin_unlock_irqrestore(&dev->pending_lock, flags); + ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); bail: return ret; } -static u64 get_counter(struct ipath_ibdev *dev, __be16 sel) +static u64 get_counter(struct ipath_ibdev *dev, + struct ipath_cregs const *crp, + __be16 sel) { u64 ret; switch (sel) { case IB_PMA_PORT_XMIT_DATA: - ret = dev->ipath_sword; + ret = (crp->cr_psxmitdatacount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) : + dev->ipath_sword; break; case IB_PMA_PORT_RCV_DATA: - ret = dev->ipath_rword; + ret = (crp->cr_psrcvdatacount) ? + ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) : + dev->ipath_rword; break; case IB_PMA_PORT_XMIT_PKTS: - ret = dev->ipath_spkts; + ret = (crp->cr_psxmitpktscount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) : + dev->ipath_spkts; break; case IB_PMA_PORT_RCV_PKTS: - ret = dev->ipath_rpkts; + ret = (crp->cr_psrcvpktscount) ? + ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) : + dev->ipath_rpkts; break; case IB_PMA_PORT_XMIT_WAIT: - ret = dev->ipath_xmit_wait; + ret = (crp->cr_psxmitwaitcount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) : + dev->ipath_xmit_wait; break; default: ret = 0; @@ -1053,14 +1066,21 @@ static int recv_pma_get_portsamplesresult(struct ib_perf *pmp, struct ib_pma_portsamplesresult *p = (struct ib_pma_portsamplesresult *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + u8 status; int i; memset(pmp->data, 0, sizeof(pmp->data)); p->tag = cpu_to_be16(dev->pma_tag); - p->sample_status = cpu_to_be16(dev->pma_sample_status); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + p->sample_status = cpu_to_be16(status); for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = cpu_to_be32( - get_counter(dev, dev->pma_counter_select[i])); + p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : + cpu_to_be32( + get_counter(dev, crp, dev->pma_counter_select[i])); return reply((struct ib_smp *) pmp); } @@ -1071,16 +1091,23 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, struct ib_pma_portsamplesresult_ext *p = (struct ib_pma_portsamplesresult_ext *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + u8 status; int i; memset(pmp->data, 0, sizeof(pmp->data)); p->tag = cpu_to_be16(dev->pma_tag); - p->sample_status = cpu_to_be16(dev->pma_sample_status); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + p->sample_status = cpu_to_be16(status); /* 64 bits */ p->extended_width = __constant_cpu_to_be32(0x80000000); for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = cpu_to_be64( - get_counter(dev, dev->pma_counter_select[i])); + p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : + cpu_to_be64( + get_counter(dev, crp, dev->pma_counter_select[i])); return reply((struct ib_smp *) pmp); } @@ -1113,6 +1140,8 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, dev->z_local_link_integrity_errors; cntrs.excessive_buffer_overrun_errors -= dev->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= dev->z_vl15_dropped; + cntrs.vl15_dropped += dev->n_vl15_dropped; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1156,10 +1185,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, cntrs.excessive_buffer_overrun_errors = 0xFUL; p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | cntrs.excessive_buffer_overrun_errors; - if (dev->n_vl15_dropped > 0xFFFFUL) + if (cntrs.vl15_dropped > 0xFFFFUL) p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); else - p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped); + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); if (cntrs.port_xmit_data > 0xFFFFFFFFUL) p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); else @@ -1262,8 +1291,10 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, dev->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; - if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { dev->n_vl15_dropped = 0; + dev->z_vl15_dropped = cntrs.vl15_dropped; + } if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) dev->z_port_xmit_data = cntrs.port_xmit_data; @@ -1434,7 +1465,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_FAILURE; + ret = IB_MAD_RESULT_SUCCESS; goto bail; default: smp->status |= IB_SMP_UNSUP_METHOD; @@ -1516,7 +1547,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num, * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_FAILURE; + ret = IB_MAD_RESULT_SUCCESS; goto bail; default: pmp->status |= IB_SMP_UNSUP_METHOD; diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6a41fdbc8e57..80dc623cee40 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -387,8 +387,8 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ib_wc wc; int ret = 0; - ipath_dbg("QP%d/%d in error state\n", - qp->ibqp.qp_num, qp->remote_qpn); + ipath_dbg("QP%d/%d in error state (%d)\n", + qp->ibqp.qp_num, qp->remote_qpn, err); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ @@ -835,7 +835,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->qp_type); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + vfree(qp->r_rq.wq); + goto bail_qp; } qp->ip = NULL; ipath_reset_qp(qp); @@ -854,8 +855,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - int err; - if (!qp->r_rq.wq) { __u64 offset = 0; @@ -863,7 +862,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, sizeof(offset)); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + goto bail_ip; } } else { u32 s = sizeof(struct ipath_rwq) + @@ -875,7 +874,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); - goto bail_rwq; + goto bail_ip; } err = ib_copy_to_udata(udata, &(qp->ip->offset), @@ -907,9 +906,11 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; bail_ip: - kfree(qp->ip); -bail_rwq: - vfree(qp->r_rq.wq); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + ipath_free_qp(&dev->qp_table, qp); bail_qp: kfree(qp); bail_swq: diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 120a61b03bc4..459e46e2c016 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -647,6 +647,7 @@ static void send_rc_ack(struct ipath_qp *qp) queue_ack: spin_lock_irqsave(&qp->s_lock, flags); + dev->n_rc_qacks++; qp->s_flags |= IPATH_S_ACK_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; @@ -798,11 +799,13 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); + if (qp->s_last_psn != psn) { + qp->s_last_psn = psn; + if (qp->s_wait_credit) { + qp->s_wait_credit = 0; + tasklet_hi_schedule(&qp->s_task); + } } - qp->s_last_psn = psn; } /** @@ -1653,13 +1656,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_FIRST): if (!ipath_get_rwqe(qp, 0)) { rnr_nak: - /* - * A RNR NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read or atomic - * is pending though. - */ - if (qp->r_nak_state) - goto done; qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; qp->r_ack_psn = qp->r_psn; goto send_ack; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 708eba3165d7..6d2a17f9c1da 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -82,8 +82,7 @@ /* kr_rcvctrl bits */ #define INFINIPATH_R_PORTENABLE_SHIFT 0 -#define INFINIPATH_R_INTRAVAIL_SHIFT 16 -#define INFINIPATH_R_TAILUPD 0x80000000 +#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38) /* kr_intstatus, kr_intclear, kr_intmask bits */ #define INFINIPATH_I_RCVURG_SHIFT 0 @@ -272,20 +271,6 @@ #define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL #define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL -/* kr_mdio bits */ -#define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL -#define INFINIPATH_MDIO_CLKDIV_SHIFT 32 -#define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL -#define INFINIPATH_MDIO_COMMAND_SHIFT 26 -#define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL -#define INFINIPATH_MDIO_DEVADDR_SHIFT 21 -#define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL -#define INFINIPATH_MDIO_REGADDR_SHIFT 16 -#define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL -#define INFINIPATH_MDIO_DATA_SHIFT 0 -#define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL -#define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL - /* kr_partitionkey bits */ #define INFINIPATH_PKEY_SIZE 16 #define INFINIPATH_PKEY_MASK 0xFFFF @@ -303,8 +288,6 @@ /* kr_xgxsconfig bits */ #define INFINIPATH_XGXS_RESET 0x7ULL -#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL -#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 #define INFINIPATH_XGXS_RX_POL_SHIFT 19 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL @@ -470,6 +453,20 @@ struct ipath_cregs { ipath_creg cr_unsupvlcnt; ipath_creg cr_wordrcvcnt; ipath_creg cr_wordsendcnt; + ipath_creg cr_vl15droppedpktcnt; + ipath_creg cr_rxotherlocalphyerrcnt; + ipath_creg cr_excessbufferovflcnt; + ipath_creg cr_locallinkintegrityerrcnt; + ipath_creg cr_rxvlerrcnt; + ipath_creg cr_rxdlidfltrcnt; + ipath_creg cr_psstat; + ipath_creg cr_psstart; + ipath_creg cr_psinterval; + ipath_creg cr_psrcvdatacount; + ipath_creg cr_psrcvpktscount; + ipath_creg cr_psxmitdatacount; + ipath_creg cr_psxmitpktscount; + ipath_creg cr_psxmitwaitcount; }; #endif /* _IPATH_REGISTERS_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 54c61a972de2..a59bdbd0ed87 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -98,11 +98,15 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { qp->s_rnr_timeout -= nqp->s_rnr_timeout; l = l->next; - if (l->next == &dev->rnrwait) + if (l->next == &dev->rnrwait) { + nqp = NULL; break; + } nqp = list_entry(l->next, struct ipath_qp, timerwait); } + if (nqp) + nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } spin_unlock_irqrestore(&dev->pending_lock, flags); @@ -479,9 +483,14 @@ done: static void want_buffer(struct ipath_devdata *dd) { - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /** diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 40c36ec19016..f772102e4713 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -59,7 +59,7 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } @@ -94,8 +94,8 @@ bail: /** * ipath_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create - * @attr: the attributes of the SRQ - * @udata: not used by the InfiniPath verbs driver + * @srq_init_attr: the attributes of the SRQ + * @udata: data from libipathverbs when creating a user SRQ */ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, @@ -211,11 +211,11 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { struct ipath_rwq *owq; - struct ipath_rwq *wq; struct ipath_rwqe *p; u32 sz, size, n, head, tail; @@ -236,27 +236,20 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail; } - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; - __u64 offset = (__u64) wq; + __u64 offset = 0; ret = ib_copy_from_udata(&offset_addr, udata, sizeof(offset_addr)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; udata->outbuf = (void __user *) offset_addr; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; } spin_lock_irq(&srq->rq.lock); @@ -277,10 +270,8 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, else n -= tail; if (size <= n) { - spin_unlock_irq(&srq->rq.lock); - vfree(wq); ret = -EINVAL; - goto bail; + goto bail_unlock; } n = 0; p = wq->wq; @@ -314,6 +305,18 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, u32 s = sizeof(struct ipath_rwq) + size * sz; ipath_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, @@ -328,7 +331,12 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); } + goto bail; +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); bail: return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index f0271415cd5b..d2725cd11bdc 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -133,15 +133,16 @@ bail: static void ipath_qcheck(struct ipath_devdata *dd) { static u64 last_tot_hdrqfull; + struct ipath_portdata *pd = dd->ipath_pd[0]; size_t blen = 0; char buf[128]; *buf = 0; - if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) { + if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) { blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", - dd->ipath_pd[0]->port_hdrqfull - + pd->port_hdrqfull - dd->ipath_p0_hdrqfull); - dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull; + dd->ipath_p0_hdrqfull = pd->port_hdrqfull; } if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { blen += snprintf(buf + blen, sizeof buf - blen, @@ -173,7 +174,7 @@ static void ipath_qcheck(struct ipath_devdata *dd) if (blen) ipath_dbg("%s\n", buf); - if (dd->ipath_port0head != (u32) + if (pd->port_head != (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) { if (dd->ipath_lastport0rcv_cnt == ipath_stats.sps_port0pkts) { @@ -181,7 +182,7 @@ static void ipath_qcheck(struct ipath_devdata *dd) "port0 hd=%llx tl=%x; port0pkts %llx\n", (unsigned long long) le64_to_cpu(*dd->ipath_hdrqtailptr), - dd->ipath_port0head, + pd->port_head, (unsigned long long) ipath_stats.sps_port0pkts); } @@ -237,7 +238,7 @@ static void ipath_chk_errormask(struct ipath_devdata *dd) void ipath_get_faststats(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - u32 val; + int i; static unsigned cnt; unsigned long flags; u64 traffic_wds; @@ -321,12 +322,11 @@ void ipath_get_faststats(unsigned long opaque) /* limit qfull messages to ~one per minute per port */ if ((++cnt & 0x10)) { - for (val = dd->ipath_cfgports - 1; ((int)val) >= 0; - val--) { - if (dd->ipath_lastegrheads[val] != -1) - dd->ipath_lastegrheads[val] = -1; - if (dd->ipath_lastrcvhdrqtails[val] != -1) - dd->ipath_lastrcvhdrqtails[val] = -1; + for (i = (int) dd->ipath_cfgports; --i >= 0; ) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (pd && pd->port_lastrcvhdrqtail != -1) + pd->port_lastrcvhdrqtail = -1; } } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index e1ad7cfc21fd..56dfc8a2344c 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -363,6 +363,60 @@ static ssize_t show_unit(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); } +static ssize_t show_jint_max_packets(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets); +} + +static ssize_t store_jint_max_packets(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 v = 0; + int ret; + + ret = ipath_parse_ushort(buf, &v); + if (ret < 0) + ipath_dev_err(dd, "invalid jint_max_packets.\n"); + else + dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v); + + return ret; +} + +static ssize_t show_jint_idle_ticks(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks); +} + +static ssize_t store_jint_idle_ticks(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 v = 0; + int ret; + + ret = ipath_parse_ushort(buf, &v); + if (ret < 0) + ipath_dev_err(dd, "invalid jint_idle_ticks.\n"); + else + dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets); + + return ret; +} + #define DEVICE_COUNTER(name, attr) \ static ssize_t show_counter_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -670,6 +724,257 @@ static ssize_t show_logged_errs(struct device *dev, return count; } +/* + * New sysfs entries to control various IB config. These all turn into + * accesses via ipath_f_get/set_ib_cfg. + * + * Get/Set heartbeat enable. Or of 1=enabled, 2=auto + */ +static ssize_t show_hrtbt_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_hrtbt_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 3) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + goto bail; + } + + /* + * Set the "intentional" heartbeat enable per either of + * "Enable" and "Auto", as these are normally set together. + * This bit is consulted when leaving loopback mode, + * because entering loopback mode overrides it and automatically + * disables heartbeat. + */ + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val); + if (r < 0) + ret = r; + else if (val == IPATH_IB_HRTBT_OFF) + dd->ipath_flags |= IPATH_NO_HRTBT; + else + dd->ipath_flags &= ~IPATH_NO_HRTBT; + +bail: + return ret; +} + +/* + * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric, + * _not_ the particular encoding of any given chip) + */ +static ssize_t show_lwid_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_lwid_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && (val == 0 || val > 3)) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, + "attempt to set invalid Link Width (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* Get current link width */ +static ssize_t show_lwid(struct device *dev, + struct device_attribute *attr, + char *buf) + +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +/* + * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR. + */ +static ssize_t show_spd_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_spd_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR))) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, + "attempt to set invalid Link Speed (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* Get current link speed */ +static ssize_t show_spd(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +/* + * Get/Set RX polarity-invert enable. 0=no, 1=yes. + */ +static ssize_t show_rx_polinv_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_rx_polinv_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret < 0 || val > 1) + goto invalid; + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid Rx Polarity (enable)\n"); +bail: + return ret; +} +/* + * Get/Set RX lane-reversal enable. 0=no, 1=yes. + */ +static ssize_t show_lanerev_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_lanerev_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 1) { + ret = -EINVAL; + ipath_dev_err(dd, + "attempt to set invalid Lane reversal (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -683,6 +988,11 @@ static struct attribute_group driver_attr_group = { .attrs = driver_attributes }; +struct attribute_group *ipath_driver_attr_groups[] = { + &driver_attr_group, + NULL, +}; + static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc); static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); @@ -701,6 +1011,10 @@ static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); +static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO, + show_jint_max_packets, store_jint_max_packets); +static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO, + show_jint_idle_ticks, store_jint_idle_ticks); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -727,6 +1041,34 @@ static struct attribute_group dev_attr_group = { .attrs = dev_attributes }; +static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, + store_hrtbt_enb); +static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb, + store_lwid_enb); +static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL); +static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb, + store_spd_enb); +static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL); +static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb, + store_rx_polinv_enb); +static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb, + store_lanerev_enb); + +static struct attribute *dev_ibcfg_attributes[] = { + &dev_attr_hrtbt_enable.attr, + &dev_attr_link_width_enable.attr, + &dev_attr_link_width.attr, + &dev_attr_link_speed_enable.attr, + &dev_attr_link_speed.attr, + &dev_attr_rx_pol_inv_enable.attr, + &dev_attr_rx_lane_rev_enable.attr, + NULL +}; + +static struct attribute_group dev_ibcfg_attr_group = { + .attrs = dev_ibcfg_attributes +}; + /** * ipath_expose_reset - create a device reset file * @dev: the device structure @@ -753,24 +1095,9 @@ int ipath_expose_reset(struct device *dev) return ret; } -int ipath_driver_create_group(struct device_driver *drv) -{ - int ret; - - ret = sysfs_create_group(&drv->kobj, &driver_attr_group); - - return ret; -} - -void ipath_driver_remove_group(struct device_driver *drv) -{ - sysfs_remove_group(&drv->kobj, &driver_attr_group); -} - int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) { int ret; - char unit[5]; ret = sysfs_create_group(&dev->kobj, &dev_attr_group); if (ret) @@ -780,11 +1107,26 @@ int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) if (ret) goto bail_attrs; - snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); - ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit); - if (ret == 0) - goto bail; + if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { + ret = device_create_file(dev, &dev_attr_jint_idle_ticks); + if (ret) + goto bail_counter; + ret = device_create_file(dev, &dev_attr_jint_max_packets); + if (ret) + goto bail_idle; + ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group); + if (ret) + goto bail_max; + } + + return 0; + +bail_max: + device_remove_file(dev, &dev_attr_jint_max_packets); +bail_idle: + device_remove_file(dev, &dev_attr_jint_idle_ticks); +bail_counter: sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); bail_attrs: sysfs_remove_group(&dev->kobj, &dev_attr_group); @@ -794,12 +1136,14 @@ bail: void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) { - char unit[5]; + sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); - snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); - sysfs_remove_link(&dev->driver->kobj, unit); + if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { + sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group); + device_remove_file(dev, &dev_attr_jint_idle_ticks); + device_remove_file(dev, &dev_attr_jint_max_packets); + } - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); sysfs_remove_group(&dev->kobj, &dev_attr_group); device_remove_file(dev, &dev_attr_reset); diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 16a2a938b520..de67eed08ed0 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -301,8 +301,6 @@ int ipath_make_ud_req(struct ipath_qp *qp) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - qp->s_hdrwords++; qp->s_cur_size = wqe->length; qp->s_cur_sge = &qp->s_sge; qp->s_wqe = wqe; @@ -327,6 +325,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr = &qp->s_hdr.u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + qp->s_hdrwords++; ohdr->u.ud.imm_data = wqe->wr.imm_data; bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; } else @@ -455,6 +454,28 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } } + /* + * The opcode is in the low byte when its in network order + * (top byte when in host order). + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (qp->ibqp.qp_num > 1 && + opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { + if (header_in_data) { + wc.imm_data = *(__be32 *) data; + data += sizeof(__be32); + } else + wc.imm_data = ohdr->u.ud.imm_data; + wc.wc_flags = IB_WC_WITH_IMM; + hdrsize += sizeof(u32); + } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { + wc.imm_data = 0; + wc.wc_flags = 0; + } else { + dev->n_pkt_drops++; + goto bail; + } + /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; if (unlikely(tlen < (hdrsize + pad + 4))) { @@ -482,28 +503,6 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.byte_len = tlen + sizeof(struct ib_grh); /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - if (qp->ibqp.qp_num > 1 && - opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - if (header_in_data) { - wc.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else - wc.imm_data = ohdr->u.ud.imm_data; - wc.wc_flags = IB_WC_WITH_IMM; - hdrsize += sizeof(u32); - } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.imm_data = 0; - wc.wc_flags = 0; - } else { - dev->n_pkt_drops++; - goto bail; - } - - /* * Get the next work request entry to find where to put the data. */ if (qp->r_reuse_sge) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 74f77e7c2c1b..32d8f882e56c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -302,8 +302,10 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - goto bail_inval; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; @@ -404,7 +406,7 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } @@ -941,7 +943,7 @@ bail: * ipath_verbs_send - send a packet * @qp: the QP to send on * @hdr: the packet header - * @hdrwords: the number of words in the header + * @hdrwords: the number of 32-bit words in the header * @ss: the SGE to send * @len: the length of the packet in bytes */ @@ -953,7 +955,10 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, int ret; u32 dwords = (len + 3) >> 2; - /* +1 is for the qword padding of pbc */ + /* + * Calculate the send buffer trigger address. + * The +1 counts for the pbc control dword following the pbc length. + */ plen = hdrwords + dwords + 1; /* Drop non-VL15 packets if we are not in the active state */ @@ -1128,20 +1133,34 @@ static int ipath_query_device(struct ib_device *ibdev, return 0; } -const u8 ipath_cvt_physportstate[16] = { - [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3, - [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5, - [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2, - [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2, - [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1, - [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1, - [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4, - [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4, - [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4, - [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4, - [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6, - [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6, - [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, +const u8 ipath_cvt_physportstate[32] = { + [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) @@ -1166,8 +1185,9 @@ static int ipath_query_port(struct ib_device *ibdev, ibcstat = dd->ipath_lastibcstat; props->state = ((ibcstat >> 4) & 0x3) + 1; /* See phys_state_show() */ - props->phys_state = ipath_cvt_physportstate[ - dd->ipath_lastibcstat & 0xf]; + props->phys_state = /* MEA: assumes shift == 0 */ + ipath_cvt_physportstate[dd->ipath_lastibcstat & + dd->ibcs_lts_mask]; props->port_cap_flags = dev->port_cap_flags; props->gid_tbl_len = 1; props->max_msg_sz = 0x80000000; @@ -1639,6 +1659,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) cntrs.local_link_integrity_errors; idev->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; + idev->z_vl15_dropped = cntrs.vl15_dropped; /* * The system image GUID is supposed to be the same for all diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 6ccb54f104a3..3d59736b49b2 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -554,6 +554,7 @@ struct ipath_ibdev { u32 z_pkey_violations; /* starting count for PMA */ u32 z_local_link_integrity_errors; /* starting count for PMA */ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ + u32 z_vl15_dropped; /* starting count for PMA */ u32 n_rc_resends; u32 n_rc_acks; u32 n_rc_qacks; @@ -598,6 +599,7 @@ struct ipath_verbs_counters { u64 port_rcv_packets; u32 local_link_integrity_errors; u32 excessive_buffer_overrun_errors; + u32 vl15_dropped; }; static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) @@ -830,7 +832,17 @@ unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; +/* + * Below converts HCA-specific LinkTrainingState to IB PhysPortState + * values. + */ extern const u8 ipath_cvt_physportstate[]; +#define IB_PHYSPORTSTATE_SLEEP 1 +#define IB_PHYSPORTSTATE_POLL 2 +#define IB_PHYSPORTSTATE_DISABLED 3 +#define IB_PHYSPORTSTATE_CFG_TRAIN 4 +#define IB_PHYSPORTSTATE_LINKUP 5 +#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 extern const int ib_ipath_state_ops[]; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 8bf44daf45ec..7950aa6e8184 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_srq *srq; int is_send; int is_error; + u32 g_mlpath_rqpn; u16 wqe_ctr; cqe = next_cqe_sw(cq); @@ -426,11 +427,11 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, wc->slid = be16_to_cpu(cqe->rlid); wc->sl = cqe->sl >> 4; - wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; - wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; - wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? - IB_WC_GRH : 0; - wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16; + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + wc->src_qp = g_mlpath_rqpn & 0xffffff; + wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; + wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; } return 0; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d8287d9db41e..96a39b5c9254 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -52,7 +52,7 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); -static const char mlx4_ib_version[] __devinitdata = +static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -468,6 +468,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (err) goto out; + dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: @@ -516,9 +517,16 @@ static struct class_device_attribute *mlx4_class_attributes[] = { static void *mlx4_ib_add(struct mlx4_dev *dev) { + static int mlx4_ib_version_printed; struct mlx4_ib_dev *ibdev; int i; + + if (!mlx4_ib_version_printed) { + printk(KERN_INFO "%s", mlx4_ib_version); + ++mlx4_ib_version_printed; + } + ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 6966f943f440..09a30dd12b14 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1255,9 +1255,14 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, if (err) goto out; - MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); - MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); - MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); + if (!mthca_is_memfree(dev)) { + MTHCA_GET(adapter->vendor_id, outbox, + QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, + QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->revision_id, outbox, + QUERY_ADAPTER_REVISION_ID_OFFSET); + } MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 15aa32eb78b6..7bbdd1f4e6c7 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -60,13 +60,12 @@ enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, MTHCA_FLAG_SRQ = 1 << 2, - MTHCA_FLAG_MSI = 1 << 3, - MTHCA_FLAG_MSI_X = 1 << 4, - MTHCA_FLAG_NO_LAM = 1 << 5, - MTHCA_FLAG_FMR = 1 << 6, - MTHCA_FLAG_MEMFREE = 1 << 7, - MTHCA_FLAG_PCIE = 1 << 8, - MTHCA_FLAG_SINAI_OPT = 1 << 9 + MTHCA_FLAG_MSI_X = 1 << 3, + MTHCA_FLAG_NO_LAM = 1 << 4, + MTHCA_FLAG_FMR = 1 << 5, + MTHCA_FLAG_MEMFREE = 1 << 6, + MTHCA_FLAG_PCIE = 1 << 7, + MTHCA_FLAG_SINAI_OPT = 1 << 8 }; enum { diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index b29de51b7f35..b60eb5df96e8 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -827,8 +827,7 @@ int mthca_init_eq_table(struct mthca_dev *dev) if (err) goto err_out_free; - if (dev->mthca_flags & MTHCA_FLAG_MSI || - dev->mthca_flags & MTHCA_FLAG_MSI_X) { + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { dev->eq_table.clr_mask = 0; } else { dev->eq_table.clr_mask = @@ -839,8 +838,7 @@ int mthca_init_eq_table(struct mthca_dev *dev) dev->eq_table.arm_mask = 0; - intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? - 128 : dev->eq_table.inta_pin; + intr = dev->eq_table.inta_pin; err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 60de6f93869e..cd3d8adbef9f 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -65,14 +65,9 @@ static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); -static int msi = 0; -module_param(msi, int, 0444); -MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)"); - #else /* CONFIG_PCI_MSI */ #define msi_x (0) -#define msi (0) #endif /* CONFIG_PCI_MSI */ @@ -131,7 +126,7 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); MODULE_PARM_DESC(fmr_reserved_mtts, "number of memory translation table segments reserved for FMR"); -static const char mthca_version[] __devinitdata = +static char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -740,7 +735,8 @@ static int mthca_init_hca(struct mthca_dev *mdev) } mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; + if (!mthca_is_memfree(mdev)) + mdev->rev_id = adapter.revision_id; memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); return 0; @@ -816,13 +812,11 @@ static int mthca_setup_hca(struct mthca_dev *dev) err = mthca_NOP(dev, &status); if (err || status) { - if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) { + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { mthca_warn(dev, "NOP command failed to generate interrupt " "(IRQ %d).\n", - dev->mthca_flags & MTHCA_FLAG_MSI_X ? - dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector : - dev->pdev->irq); - mthca_warn(dev, "Trying again with MSI/MSI-X disabled.\n"); + dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector); + mthca_warn(dev, "Trying again with MSI-X disabled.\n"); } else { mthca_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", @@ -1005,7 +999,7 @@ static struct { .flags = 0 }, [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), .flags = MTHCA_FLAG_PCIE }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0), + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), @@ -1128,29 +1122,12 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (msi_x && !mthca_enable_msi_x(mdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI_X; - else if (msi) { - static int warned; - - if (!warned) { - printk(KERN_WARNING PFX "WARNING: MSI support will be " - "removed from the ib_mthca driver in January 2008.\n"); - printk(KERN_WARNING " If you are using MSI and cannot " - "switch to MSI-X, please tell " - "<general@lists.openfabrics.org>.\n"); - ++warned; - } - - if (!pci_enable_msi(pdev)) - mdev->mthca_flags |= MTHCA_FLAG_MSI; - } err = mthca_setup_hca(mdev); - if (err == -EBUSY && (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))) { + if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) { if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); - if (mdev->mthca_flags & MTHCA_FLAG_MSI) - pci_disable_msi(pdev); - mdev->mthca_flags &= ~(MTHCA_FLAG_MSI_X | MTHCA_FLAG_MSI); + mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X; err = mthca_setup_hca(mdev); } @@ -1192,8 +1169,6 @@ err_cleanup: err_close: if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); - if (mdev->mthca_flags & MTHCA_FLAG_MSI) - pci_disable_msi(pdev); mthca_close_hca(mdev); @@ -1246,8 +1221,6 @@ static void __mthca_remove_one(struct pci_dev *pdev) if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); - if (mdev->mthca_flags & MTHCA_FLAG_MSI) - pci_disable_msi(pdev); ib_dealloc_device(&mdev->ib_dev); mthca_release_regions(pdev, mdev->mthca_flags & diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index aa6c70a6a36f..3b6985557cb2 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -613,8 +613,10 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, sizeof *(mr->mem.tavor.mpt) * idx; mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); - if (IS_ERR(mr->mtt)) + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); goto err_out_table; + } mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; @@ -627,8 +629,10 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_free_mtt; + } mpt_entry = mailbox->buf; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 6bcde1cb9688..9e491df6419c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -923,17 +923,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, struct mthca_mr *mr; u64 *page_list; u64 total_size; - u64 mask; + unsigned long mask; int shift; int npages; int err; int i, j, n; - /* First check that we have enough alignment */ - if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) - return ERR_PTR(-EINVAL); - - mask = 0; + mask = buffer_list[0].addr ^ *iova_start; total_size = 0; for (i = 0; i < num_phys_buf; ++i) { if (i != 0) @@ -947,17 +943,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, if (mask & ~PAGE_MASK) return ERR_PTR(-EINVAL); - /* Find largest page shift we can use to cover buffers */ - for (shift = PAGE_SHIFT; shift < 31; ++shift) - if (num_phys_buf > 1) { - if ((1ULL << shift) & mask) - break; - } else { - if (1ULL << shift >= - buffer_list[0].size + - (buffer_list[0].addr & ((1ULL << shift) - 1))) - break; - } + shift = __ffs(mask | 1 << 31); buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); buffer_list[0].addr &= ~0ull << shift; @@ -1270,6 +1256,8 @@ static int mthca_init_node_data(struct mthca_dev *dev) goto out; } + if (mthca_is_memfree(dev)) + dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 0e5461c65731..db5595bbf7f0 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1175,6 +1175,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, { int ret; int i; + struct mthca_next_seg *next; qp->refcount = 1; init_waitqueue_head(&qp->wait); @@ -1217,7 +1218,6 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, } if (mthca_is_memfree(dev)) { - struct mthca_next_seg *next; struct mthca_data_seg *scatter; int size = (sizeof (struct mthca_next_seg) + qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; @@ -1240,6 +1240,13 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, qp->sq.wqe_shift) + qp->send_wqe_offset); } + } else { + for (i = 0; i < qp->rq.max; ++i) { + next = get_recv_wqe(qp, i); + next->nda_op = htonl((((i + 1) % qp->rq.max) << + qp->rq.wqe_shift) | 1); + } + } qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); @@ -1863,7 +1870,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, prev_wqe = qp->rq.last; qp->rq.last = wqe; - ((struct mthca_next_seg *) wqe)->nda_op = 0; ((struct mthca_next_seg *) wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD); ((struct mthca_next_seg *) wqe)->flags = 0; @@ -1885,9 +1891,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, qp->wrid[ind] = wr->wr_id; - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << qp->rq.wqe_shift) | 1); - wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD | size); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 553d681f6813..a5ffff6e1026 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -175,9 +175,17 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, * scatter list L_Keys to the sentry value of 0x100. */ for (i = 0; i < srq->max; ++i) { - wqe = get_wqe(srq, i); + struct mthca_next_seg *next; - *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + next = wqe = get_wqe(srq, i); + + if (i < srq->max - 1) { + *wqe_to_link(wqe) = i + 1; + next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1); + } else { + *wqe_to_link(wqe) = -1; + next->nda_op = 0; + } for (scatter = wqe + sizeof (struct mthca_next_seg); (void *) scatter < wqe + (1 << srq->wqe_shift); @@ -470,16 +478,15 @@ out: void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) { int ind; + struct mthca_next_seg *last_free; ind = wqe_addr >> srq->wqe_shift; spin_lock(&srq->lock); - if (likely(srq->first_free >= 0)) - *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; - else - srq->first_free = ind; - + last_free = get_wqe(srq, srq->last_free); + *wqe_to_link(last_free) = ind; + last_free->nda_op = htonl((ind << srq->wqe_shift) | 1); *wqe_to_link(get_wqe(srq, ind)) = -1; srq->last_free = ind; @@ -506,15 +513,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, first_ind = srq->first_free; for (nreq = 0; wr; wr = wr->next) { - ind = srq->first_free; - - if (unlikely(ind < 0)) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); - err = -ENOMEM; - *bad_wr = wr; - break; - } - + ind = srq->first_free; wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); @@ -528,7 +527,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, prev_wqe = srq->last; srq->last = wqe; - ((struct mthca_next_seg *) wqe)->nda_op = 0; ((struct mthca_next_seg *) wqe)->ee_nds = 0; /* flags field will always remain 0 */ @@ -549,9 +547,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if (i < srq->max_gs) mthca_set_data_seg_inval(wqe); - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << srq->wqe_shift) | 1); - wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD); @@ -614,15 +609,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, spin_lock_irqsave(&srq->lock, flags); for (nreq = 0; wr; ++nreq, wr = wr->next) { - ind = srq->first_free; - - if (unlikely(ind < 0)) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); - err = -ENOMEM; - *bad_wr = wr; - break; - } - + ind = srq->first_free; wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); @@ -633,8 +620,6 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, break; } - ((struct mthca_next_seg *) wqe)->nda_op = - cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; /* flags field will always remain 0 */ diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig new file mode 100644 index 000000000000..2aeb7ac972a9 --- /dev/null +++ b/drivers/infiniband/hw/nes/Kconfig @@ -0,0 +1,16 @@ +config INFINIBAND_NES + tristate "NetEffect RNIC Driver" + depends on PCI && INET && INFINIBAND + select LIBCRC32C + ---help--- + This is a low-level driver for NetEffect RDMA enabled + Network Interface Cards (RNIC). + +config INFINIBAND_NES_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_NES + default n + ---help--- + This option causes the NetEffect RNIC driver to produce debug + messages. Select this if you are developing the driver + or trying to diagnose a problem. diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile new file mode 100644 index 000000000000..35148513c47e --- /dev/null +++ b/drivers/infiniband/hw/nes/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o + +iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c new file mode 100644 index 000000000000..7f8853b44ee1 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes.c @@ -0,0 +1,1152 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/if_vlan.h> +#include <linux/crc32.h> +#include <linux/in.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/if_arp.h> +#include <linux/highmem.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/byteorder.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/iw_cm.h> + +#include "nes.h" + +#include <net/netevent.h> +#include <net/neighbour.h> +#include <linux/route.h> +#include <net/ip_fib.h> + +MODULE_AUTHOR("NetEffect"); +MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +int max_mtu = 9000; +int nics_per_function = 1; +int interrupt_mod_interval = 0; + + +/* Interoperability */ +int mpa_version = 1; +module_param(mpa_version, int, 0); +MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)"); + +/* Interoperability */ +int disable_mpa_crc = 0; +module_param(disable_mpa_crc, int, 0); +MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); + +unsigned int send_first = 0; +module_param(send_first, int, 0); +MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); + + +unsigned int nes_drv_opt = 0; +module_param(nes_drv_opt, int, 0); +MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); + +unsigned int nes_debug_level = 0; +module_param_named(debug_level, nes_debug_level, uint, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug output level"); + +LIST_HEAD(nes_adapter_list); +LIST_HEAD(nes_dev_list); + +atomic_t qps_destroyed; +atomic_t cqp_reqs_allocated; +atomic_t cqp_reqs_freed; +atomic_t cqp_reqs_dynallocated; +atomic_t cqp_reqs_dynfreed; +atomic_t cqp_reqs_queued; +atomic_t cqp_reqs_redriven; + +static void nes_print_macaddr(struct net_device *netdev); +static irqreturn_t nes_interrupt(int, void *); +static int __devinit nes_probe(struct pci_dev *, const struct pci_device_id *); +static void __devexit nes_remove(struct pci_dev *); +static int __init nes_init_module(void); +static void __exit nes_exit_module(void); +static unsigned int ee_flsh_adapter; +static unsigned int sysfs_nonidx_addr; +static unsigned int sysfs_idx_addr; + +static struct pci_device_id nes_pci_table[] = { + {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID}, + {0} +}; + +MODULE_DEVICE_TABLE(pci, nes_pci_table); + +static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); +static int nes_net_event(struct notifier_block *, unsigned long, void *); +static int nes_notifiers_registered; + + +static struct notifier_block nes_inetaddr_notifier = { + .notifier_call = nes_inetaddr_event +}; + +static struct notifier_block nes_net_notifier = { + .notifier_call = nes_net_event +}; + + + + +/** + * nes_inetaddr_event + */ +static int nes_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *event_netdev = ifa->ifa_dev->dev; + struct nes_device *nesdev; + struct net_device *netdev; + struct nes_vnic *nesvnic; + unsigned int addr; + unsigned int mask; + + addr = ntohl(ifa->ifa_address); + mask = ntohl(ifa->ifa_mask); + nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %08X, netmask %08X.\n", + addr, mask); + list_for_each_entry(nesdev, &nes_dev_list, list) { + nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n", + nesdev, nesdev->netdev[0]->name); + netdev = nesdev->netdev[0]; + nesvnic = netdev_priv(netdev); + if (netdev == event_netdev) { + if (nesvnic->rdma_enabled == 0) { + nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" + " RDMA is not enabled.\n", + netdev->name); + return NOTIFY_OK; + } + /* we have ifa->ifa_address/mask here if we need it */ + switch (event) { + case NETDEV_DOWN: + nes_debug(NES_DBG_NETDEV, "event:DOWN\n"); + nes_write_indexed(nesdev, + NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0); + + nes_manage_arp_cache(netdev, netdev->dev_addr, + ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE); + nesvnic->local_ipaddr = 0; + return NOTIFY_OK; + break; + case NETDEV_UP: + nes_debug(NES_DBG_NETDEV, "event:UP\n"); + + if (nesvnic->local_ipaddr != 0) { + nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n"); + return NOTIFY_OK; + } + /* Add the address to the IP table */ + nesvnic->local_ipaddr = ifa->ifa_address; + + nes_write_indexed(nesdev, + NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), + ntohl(ifa->ifa_address)); + nes_manage_arp_cache(netdev, netdev->dev_addr, + ntohl(nesvnic->local_ipaddr), NES_ARP_ADD); + return NOTIFY_OK; + break; + default: + break; + } + } + } + + return NOTIFY_DONE; +} + + +/** + * nes_net_event + */ +static int nes_net_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct neighbour *neigh = ptr; + struct nes_device *nesdev; + struct net_device *netdev; + struct nes_vnic *nesvnic; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + list_for_each_entry(nesdev, &nes_dev_list, list) { + /* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */ + netdev = nesdev->netdev[0]; + nesvnic = netdev_priv(netdev); + if (netdev == neigh->dev) { + if (nesvnic->rdma_enabled == 0) { + nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n", + netdev->name); + } else { + if (neigh->nud_state & NUD_VALID) { + nes_manage_arp_cache(neigh->dev, neigh->ha, + ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD); + } else { + nes_manage_arp_cache(neigh->dev, neigh->ha, + ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE); + } + } + return NOTIFY_OK; + } + } + break; + default: + nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event); + break; + } + + return NOTIFY_DONE; +} + + +/** + * nes_add_ref + */ +void nes_add_ref(struct ib_qp *ibqp) +{ + struct nes_qp *nesqp; + + nesqp = to_nesqp(ibqp); + nes_debug(NES_DBG_QP, "Bumping refcount for QP%u. Pre-inc value = %u\n", + ibqp->qp_num, atomic_read(&nesqp->refcount)); + atomic_inc(&nesqp->refcount); +} + +static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + unsigned long flags; + struct nes_qp *nesqp = cqp_request->cqp_callback_pointer; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 qp_id; + + atomic_inc(&qps_destroyed); + + /* Free the control structures */ + + qp_id = nesqp->hwqp.qp_id; + if (nesqp->pbl_vbase) { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); + nesqp->pbl_vbase = NULL; + + } else { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + } + nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); + + kfree(nesqp->allocated_buffer); + +} + +/** + * nes_rem_ref + */ +void nes_rem_ref(struct ib_qp *ibqp) +{ + u64 u64temp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + u32 opcode; + + nesqp = to_nesqp(ibqp); + + if (atomic_read(&nesqp->refcount) == 0) { + printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n", + __FUNCTION__, ibqp->qp_num, nesqp->last_aeq); + BUG(); + } + + if (atomic_dec_and_test(&nesqp->refcount)) { + nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; + + /* Destroy the QP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); + return; + } + cqp_request->waiting = 0; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_cqp_rem_ref_callback; + cqp_request->cqp_callback_pointer = nesqp; + cqp_wqe = &cqp_request->cqp_wqe; + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP; + + if (nesqp->hte_added) { + opcode |= NES_CQP_QP_DEL_HTE; + nesqp->hte_added = 0; + } + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + } +} + + +/** + * nes_get_qp + */ +struct ib_qp *nes_get_qp(struct ib_device *device, int qpn) +{ + struct nes_vnic *nesvnic = to_nesvnic(device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp))) + return NULL; + + return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp; +} + + +/** + * nes_print_macaddr + */ +static void nes_print_macaddr(struct net_device *netdev) +{ + nes_debug(NES_DBG_INIT, "%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, IRQ %u\n", + netdev->name, + netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], + netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5], + netdev->irq); +} + + +/** + * nes_interrupt - handle interrupts + */ +static irqreturn_t nes_interrupt(int irq, void *dev_id) +{ + struct nes_device *nesdev = (struct nes_device *)dev_id; + int handled = 0; + u32 int_mask; + u32 int_req; + u32 int_stat; + u32 intf_int_stat; + u32 timer_stat; + + if (nesdev->msi_enabled) { + /* No need to read the interrupt pending register if msi is enabled */ + handled = 1; + } else { + if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) { + /* Master interrupt enable provides synchronization for kicking off bottom half + when interrupt sharing is going on */ + int_mask = nes_read32(nesdev->regs + NES_INT_MASK); + if (int_mask & 0x80000000) { + /* Check interrupt status to see if this might be ours */ + int_stat = nes_read32(nesdev->regs + NES_INT_STAT); + int_req = nesdev->int_req; + if (int_stat&int_req) { + /* if interesting CEQ or AEQ is pending, claim the interrupt */ + if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) { + handled = 1; + } else { + if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) { + /* Timer might be running but might be for another function */ + timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); + if ((timer_stat & nesdev->timer_int_req) != 0) { + handled = 1; + } + } + if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) && + (handled == 0)) { + intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); + if ((intf_int_stat & nesdev->intf_int_req) != 0) { + handled = 1; + } + } + } + if (handled) { + nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000)); + int_mask = nes_read32(nesdev->regs+NES_INT_MASK); + /* Save off the status to save an additional read */ + nesdev->int_stat = int_stat; + nesdev->napi_isr_ran = 1; + } + } + } + } else { + handled = nes_read32(nesdev->regs+NES_INT_PENDING); + } + } + + if (handled) { + + if (nes_napi_isr(nesdev) == 0) { + tasklet_schedule(&nesdev->dpc_tasklet); + + } + return IRQ_HANDLED; + } else { + return IRQ_NONE; + } +} + + +/** + * nes_probe - Device initialization + */ +static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) +{ + struct net_device *netdev = NULL; + struct nes_device *nesdev = NULL; + int ret = 0; + struct nes_vnic *nesvnic = NULL; + void __iomem *mmio_regs = NULL; + u8 hw_rev; + + assert(pcidev != NULL); + assert(ent != NULL); + + printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n", + DRV_VERSION, pci_name(pcidev)); + + ret = pci_enable_device(pcidev); + if (ret) { + printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev)); + goto bail0; + } + + nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n", + (long unsigned int)pci_resource_start(pcidev, BAR_0), + (long unsigned int)pci_resource_len(pcidev, BAR_0)); + nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n", + (long unsigned int)pci_resource_start(pcidev, BAR_1), + (long unsigned int)pci_resource_len(pcidev, BAR_1)); + + /* Make sure PCI base addr are MMIO */ + if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) || + !(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); + ret = -ENODEV; + goto bail1; + } + + /* Reserve PCI I/O and memory resources */ + ret = pci_request_regions(pcidev, DRV_NAME); + if (ret) { + printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev)); + goto bail1; + } + + if ((sizeof(dma_addr_t) > 4)) { + ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK); + if (ret < 0) { + printk(KERN_ERR PFX "64b DMA mask configuration failed\n"); + goto bail2; + } + ret = pci_set_consistent_dma_mask(pcidev, DMA_64BIT_MASK); + if (ret) { + printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n"); + goto bail2; + } + } else { + ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK); + if (ret < 0) { + printk(KERN_ERR PFX "32b DMA mask configuration failed\n"); + goto bail2; + } + ret = pci_set_consistent_dma_mask(pcidev, DMA_32BIT_MASK); + if (ret) { + printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n"); + goto bail2; + } + } + + pci_set_master(pcidev); + + /* Allocate hardware structure */ + nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL); + if (!nesdev) { + printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev)); + ret = -ENOMEM; + goto bail2; + } + + nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev); + nesdev->pcidev = pcidev; + pci_set_drvdata(pcidev, nesdev); + + pci_read_config_byte(pcidev, 0x0008, &hw_rev); + nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev); + + spin_lock_init(&nesdev->indexed_regs_lock); + + /* Remap the PCI registers in adapter BAR0 to kernel VA space */ + mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs)); + if (mmio_regs == NULL) { + printk(KERN_ERR PFX "Unable to remap BAR0\n"); + ret = -EIO; + goto bail3; + } + nesdev->regs = mmio_regs; + nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs; + + /* Ensure interrupts are disabled */ + nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); + + if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) { + if (!pci_enable_msi(nesdev->pcidev)) { + nesdev->msi_enabled = 1; + nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n", + pci_name(pcidev)); + } else { + nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n", + pci_name(pcidev)); + } + } else { + nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n", + pci_name(pcidev)); + } + + nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0); + nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1); + + /* Init the adapter */ + nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); + nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + if (!nesdev->nesadapter) { + printk(KERN_ERR PFX "Unable to initialize adapter.\n"); + ret = -ENOMEM; + goto bail5; + } + + /* nesdev->base_doorbell_index = + nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ + nesdev->base_doorbell_index = 1; + nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; + nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count; + + tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); + + /* bring up the Control QP */ + if (nes_init_cqp(nesdev)) { + ret = -ENODEV; + goto bail6; + } + + /* Arm the CCQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + PCI_FUNC(nesdev->pcidev->devfn)); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + + /* Enable the interrupts */ + nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) | + (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); + if (PCI_FUNC(nesdev->pcidev->devfn) < 4) { + nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24)); + } + + /* TODO: This really should be the first driver to load, not function 0 */ + if (PCI_FUNC(nesdev->pcidev->devfn) == 0) { + /* pick up PCI and critical errors if the first driver to load */ + nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR; + nesdev->int_req |= NES_INT_INTF; + } else { + nesdev->intf_int_req = 0; + } + nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804); + + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790); + + /* deal with both periodic and one_shot */ + nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn); + nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req; + nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n", + PCI_FUNC(nesdev->pcidev->devfn), + nesdev->timer_int_req, nesdev->nesadapter->timer_int_req); + + nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + + list_add_tail(&nesdev->list, &nes_dev_list); + + /* Request an interrupt line for the driver */ + ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev); + if (ret) { + printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", + pci_name(pcidev), pcidev->irq); + goto bail65; + } + + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + + if (nes_notifiers_registered == 0) { + register_inetaddr_notifier(&nes_inetaddr_notifier); + register_netevent_notifier(&nes_net_notifier); + } + nes_notifiers_registered++; + + /* Initialize network devices */ + if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) { + goto bail7; + } + + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); + nes_netdev_destroy(netdev); + goto bail7; + } + + nes_print_macaddr(netdev); + /* create a CM core for this netdev */ + nesvnic = netdev_priv(netdev); + + nesdev->netdev_count++; + nesdev->nesadapter->netdev_count++; + + + printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n", + pci_name(pcidev)); + return 0; + + bail7: + printk(KERN_ERR PFX "bail7\n"); + while (nesdev->netdev_count > 0) { + nesdev->netdev_count--; + nesdev->nesadapter->netdev_count--; + + unregister_netdev(nesdev->netdev[nesdev->netdev_count]); + nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]); + } + + nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", + nesdev->netdev_count, nesdev->nesadapter->netdev_count); + + nes_notifiers_registered--; + if (nes_notifiers_registered == 0) { + unregister_netevent_notifier(&nes_net_notifier); + unregister_inetaddr_notifier(&nes_inetaddr_notifier); + } + + list_del(&nesdev->list); + nes_destroy_cqp(nesdev); + + bail65: + printk(KERN_ERR PFX "bail65\n"); + free_irq(pcidev->irq, nesdev); + if (nesdev->msi_enabled) { + pci_disable_msi(pcidev); + } + bail6: + printk(KERN_ERR PFX "bail6\n"); + tasklet_kill(&nesdev->dpc_tasklet); + /* Deallocate the Adapter Structure */ + nes_destroy_adapter(nesdev->nesadapter); + + bail5: + printk(KERN_ERR PFX "bail5\n"); + iounmap(nesdev->regs); + + bail3: + printk(KERN_ERR PFX "bail3\n"); + kfree(nesdev); + + bail2: + pci_release_regions(pcidev); + + bail1: + pci_disable_device(pcidev); + + bail0: + return ret; +} + + +/** + * nes_remove - unload from kernel + */ +static void __devexit nes_remove(struct pci_dev *pcidev) +{ + struct nes_device *nesdev = pci_get_drvdata(pcidev); + struct net_device *netdev; + int netdev_index = 0; + + if (nesdev->netdev_count) { + netdev = nesdev->netdev[netdev_index]; + if (netdev) { + netif_stop_queue(netdev); + unregister_netdev(netdev); + nes_netdev_destroy(netdev); + + nesdev->netdev[netdev_index] = NULL; + nesdev->netdev_count--; + nesdev->nesadapter->netdev_count--; + } + } + + nes_notifiers_registered--; + if (nes_notifiers_registered == 0) { + unregister_netevent_notifier(&nes_net_notifier); + unregister_inetaddr_notifier(&nes_inetaddr_notifier); + } + + list_del(&nesdev->list); + nes_destroy_cqp(nesdev); + tasklet_kill(&nesdev->dpc_tasklet); + + /* Deallocate the Adapter Structure */ + nes_destroy_adapter(nesdev->nesadapter); + + free_irq(pcidev->irq, nesdev); + + if (nesdev->msi_enabled) { + pci_disable_msi(pcidev); + } + + iounmap(nesdev->regs); + kfree(nesdev); + + /* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */ + pci_release_regions(pcidev); + pci_disable_device(pcidev); + pci_set_drvdata(pcidev, NULL); +} + + +static struct pci_driver nes_pci_driver = { + .name = DRV_NAME, + .id_table = nes_pci_table, + .probe = nes_probe, + .remove = __devexit_p(nes_remove), +}; + +static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) +{ + unsigned int devfn = 0xffffffff; + unsigned char bus_number = 0xff; + unsigned int i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + devfn = nesdev->nesadapter->devfn; + bus_number = nesdev->nesadapter->bus_number; + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn); +} + +static ssize_t nes_store_adapter(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + + ee_flsh_adapter = simple_strtoul(p, &p, 10); + return strnlen(buf, count); +} + +static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf) +{ + u32 eeprom_cmd = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND); + break; + } + i++; + } + return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd); +} + +static ssize_t nes_store_ee_cmd(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf) +{ + u32 eeprom_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data); +} + +static ssize_t nes_store_ee_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_EEPROM_DATA, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf) +{ + u32 flash_cmd = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd); +} + +static ssize_t nes_store_flash_cmd(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_FLASH_COMMAND, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf) +{ + u32 flash_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data); +} + +static ssize_t nes_store_flash_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_FLASH_DATA, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_nonidx_addr(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr); +} + +static ssize_t nes_store_nonidx_addr(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') + sysfs_nonidx_addr = simple_strtoul(p, &p, 16); + + return strnlen(buf, count); +} + +static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf) +{ + u32 nonidx_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data); +} + +static ssize_t nes_store_nonidx_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + sysfs_nonidx_addr, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_idx_addr(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr); +} + +static ssize_t nes_store_idx_addr(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') + sysfs_idx_addr = simple_strtoul(p, &p, 16); + + return strnlen(buf, count); +} + +static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf) +{ + u32 idx_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + idx_data = nes_read_indexed(nesdev, sysfs_idx_addr); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data); +} + +static ssize_t nes_store_idx_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write_indexed(nesdev, sysfs_idx_addr, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR, + nes_show_adapter, nes_store_adapter); +static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR, + nes_show_ee_cmd, nes_store_ee_cmd); +static DRIVER_ATTR(eeprom_data, S_IRUSR | S_IWUSR, + nes_show_ee_data, nes_store_ee_data); +static DRIVER_ATTR(flash_cmd, S_IRUSR | S_IWUSR, + nes_show_flash_cmd, nes_store_flash_cmd); +static DRIVER_ATTR(flash_data, S_IRUSR | S_IWUSR, + nes_show_flash_data, nes_store_flash_data); +static DRIVER_ATTR(nonidx_addr, S_IRUSR | S_IWUSR, + nes_show_nonidx_addr, nes_store_nonidx_addr); +static DRIVER_ATTR(nonidx_data, S_IRUSR | S_IWUSR, + nes_show_nonidx_data, nes_store_nonidx_data); +static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR, + nes_show_idx_addr, nes_store_idx_addr); +static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR, + nes_show_idx_data, nes_store_idx_data); + +static int nes_create_driver_sysfs(struct pci_driver *drv) +{ + int error; + error = driver_create_file(&drv->driver, &driver_attr_adapter); + error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd); + error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data); + error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd); + error |= driver_create_file(&drv->driver, &driver_attr_flash_data); + error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr); + error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); + error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); + error |= driver_create_file(&drv->driver, &driver_attr_idx_data); + return error; +} + +static void nes_remove_driver_sysfs(struct pci_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_adapter); + driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd); + driver_remove_file(&drv->driver, &driver_attr_eeprom_data); + driver_remove_file(&drv->driver, &driver_attr_flash_cmd); + driver_remove_file(&drv->driver, &driver_attr_flash_data); + driver_remove_file(&drv->driver, &driver_attr_nonidx_addr); + driver_remove_file(&drv->driver, &driver_attr_nonidx_data); + driver_remove_file(&drv->driver, &driver_attr_idx_addr); + driver_remove_file(&drv->driver, &driver_attr_idx_data); +} + +/** + * nes_init_module - module initialization entry point + */ +static int __init nes_init_module(void) +{ + int retval; + int retval1; + + retval = nes_cm_start(); + if (retval) { + printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); + return retval; + } + retval = pci_register_driver(&nes_pci_driver); + if (retval >= 0) { + retval1 = nes_create_driver_sysfs(&nes_pci_driver); + if (retval1 < 0) + printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n"); + } + return retval; +} + + +/** + * nes_exit_module - module unload entry point + */ +static void __exit nes_exit_module(void) +{ + nes_cm_stop(); + nes_remove_driver_sysfs(&nes_pci_driver); + + pci_unregister_driver(&nes_pci_driver); +} + + +module_init(nes_init_module); +module_exit(nes_exit_module); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h new file mode 100644 index 000000000000..fd57e8a1582f --- /dev/null +++ b/drivers/infiniband/hw/nes/nes.h @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NES_H +#define __NES_H + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/slab.h> +#include <asm/semaphore.h> +#include <linux/version.h> +#include <asm/io.h> +#include <linux/crc32c.h> + +#include <rdma/ib_smi.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/rdma_cm.h> +#include <rdma/iw_cm.h> + +#define NES_SEND_FIRST_WRITE + +#define QUEUE_DISCONNECTS + +#define DRV_BUILD "1" + +#define DRV_NAME "iw_nes" +#define DRV_VERSION "1.0 KO Build " DRV_BUILD +#define PFX DRV_NAME ": " + +/* + * NetEffect PCI vendor id and NE010 PCI device id. + */ +#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ +#define PCI_VENDOR_ID_NETEFFECT 0x1678 +#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 +#endif + +#define NE020_REV 4 +#define NE020_REV1 5 + +#define BAR_0 0 +#define BAR_1 2 + +#define RX_BUF_SIZE (1536 + 8) +#define NES_REG0_SIZE (4 * 1024) +#define NES_TX_TIMEOUT (6*HZ) +#define NES_FIRST_QPN 64 +#define NES_SW_CONTEXT_ALIGN 1024 + +#define NES_NIC_MAX_NICS 16 +#define NES_MAX_ARP_TABLE_SIZE 4096 + +#define NES_NIC_CEQ_SIZE 8 +/* NICs will be on a separate CQ */ +#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32) + +#define NES_MAX_PORT_COUNT 4 + +#define MAX_DPC_ITERATIONS 128 + +#define NES_CQP_REQUEST_NO_DOORBELL_RING 0 +#define NES_CQP_REQUEST_RING_DOORBELL 1 + +#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 +#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 +#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 +#define NES_DRV_OPT_DISABLE_INTF 0x00000008 +#define NES_DRV_OPT_ENABLE_MSI 0x00000010 +#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 +#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040 +#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 +#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 +#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 + +#define NES_AEQ_EVENT_TIMEOUT 2500 +#define NES_DISCONNECT_EVENT_TIMEOUT 2000 + +/* debug levels */ +/* must match userspace */ +#define NES_DBG_HW 0x00000001 +#define NES_DBG_INIT 0x00000002 +#define NES_DBG_ISR 0x00000004 +#define NES_DBG_PHY 0x00000008 +#define NES_DBG_NETDEV 0x00000010 +#define NES_DBG_CM 0x00000020 +#define NES_DBG_CM1 0x00000040 +#define NES_DBG_NIC_RX 0x00000080 +#define NES_DBG_NIC_TX 0x00000100 +#define NES_DBG_CQP 0x00000200 +#define NES_DBG_MMAP 0x00000400 +#define NES_DBG_MR 0x00000800 +#define NES_DBG_PD 0x00001000 +#define NES_DBG_CQ 0x00002000 +#define NES_DBG_QP 0x00004000 +#define NES_DBG_MOD_QP 0x00008000 +#define NES_DBG_AEQ 0x00010000 +#define NES_DBG_IW_RX 0x00020000 +#define NES_DBG_IW_TX 0x00040000 +#define NES_DBG_SHUTDOWN 0x00080000 +#define NES_DBG_RSVD1 0x10000000 +#define NES_DBG_RSVD2 0x20000000 +#define NES_DBG_RSVD3 0x40000000 +#define NES_DBG_RSVD4 0x80000000 +#define NES_DBG_ALL 0xffffffff + +#ifdef CONFIG_INFINIBAND_NES_DEBUG +#define nes_debug(level, fmt, args...) \ + if (level & nes_debug_level) \ + printk(KERN_ERR PFX "%s[%u]: " fmt, __FUNCTION__, __LINE__, ##args) + +#define assert(expr) \ +if (!(expr)) { \ + printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ + #expr, __FILE__, __FUNCTION__, __LINE__); \ +} + +#define NES_EVENT_TIMEOUT 1200000 +#else +#define nes_debug(level, fmt, args...) +#define assert(expr) do {} while (0) + +#define NES_EVENT_TIMEOUT 100000 +#endif + +#include "nes_hw.h" +#include "nes_verbs.h" +#include "nes_context.h" +#include "nes_user.h" +#include "nes_cm.h" + +extern int max_mtu; +extern int nics_per_function; +#define max_frame_len (max_mtu+ETH_HLEN) +extern int interrupt_mod_interval; +extern int nes_if_count; +extern int mpa_version; +extern int disable_mpa_crc; +extern unsigned int send_first; +extern unsigned int nes_drv_opt; +extern unsigned int nes_debug_level; + +extern struct list_head nes_adapter_list; +extern struct list_head nes_dev_list; + +extern struct nes_cm_core *g_cm_core; + +extern atomic_t cm_connects; +extern atomic_t cm_accepts; +extern atomic_t cm_disconnects; +extern atomic_t cm_closes; +extern atomic_t cm_connecteds; +extern atomic_t cm_connect_reqs; +extern atomic_t cm_rejects; +extern atomic_t mod_qp_timouts; +extern atomic_t qps_created; +extern atomic_t qps_destroyed; +extern atomic_t sw_qps_destroyed; +extern u32 mh_detected; +extern u32 mh_pauses_sent; +extern u32 cm_packets_sent; +extern u32 cm_packets_bounced; +extern u32 cm_packets_created; +extern u32 cm_packets_received; +extern u32 cm_packets_dropped; +extern u32 cm_packets_retrans; +extern u32 cm_listens_created; +extern u32 cm_listens_destroyed; +extern u32 cm_backlog_drops; +extern atomic_t cm_loopbacks; +extern atomic_t cm_nodes_created; +extern atomic_t cm_nodes_destroyed; +extern atomic_t cm_accel_dropped_pkts; +extern atomic_t cm_resets_recvd; + +extern u32 crit_err_count; +extern u32 int_mod_timer_init; +extern u32 int_mod_cq_depth_256; +extern u32 int_mod_cq_depth_128; +extern u32 int_mod_cq_depth_32; +extern u32 int_mod_cq_depth_24; +extern u32 int_mod_cq_depth_16; +extern u32 int_mod_cq_depth_4; +extern u32 int_mod_cq_depth_1; + +extern atomic_t cqp_reqs_allocated; +extern atomic_t cqp_reqs_freed; +extern atomic_t cqp_reqs_dynallocated; +extern atomic_t cqp_reqs_dynfreed; +extern atomic_t cqp_reqs_queued; +extern atomic_t cqp_reqs_redriven; + + +struct nes_device { + struct nes_adapter *nesadapter; + void __iomem *regs; + void __iomem *index_reg; + struct pci_dev *pcidev; + struct net_device *netdev[NES_NIC_MAX_NICS]; + u64 link_status_interrupts; + struct tasklet_struct dpc_tasklet; + spinlock_t indexed_regs_lock; + unsigned long csr_start; + unsigned long doorbell_region; + unsigned long doorbell_start; + unsigned long mac_tx_errors; + unsigned long mac_pause_frames_sent; + unsigned long mac_pause_frames_received; + unsigned long mac_rx_errors; + unsigned long mac_rx_crc_errors; + unsigned long mac_rx_symbol_err_frames; + unsigned long mac_rx_jabber_frames; + unsigned long mac_rx_oversized_frames; + unsigned long mac_rx_short_frames; + unsigned long port_rx_discards; + unsigned long port_tx_discards; + unsigned int mac_index; + unsigned int nes_stack_start; + + /* Control Structures */ + void *cqp_vbase; + dma_addr_t cqp_pbase; + u32 cqp_mem_size; + u8 ceq_index; + u8 nic_ceq_index; + struct nes_hw_cqp cqp; + struct nes_hw_cq ccq; + struct list_head cqp_avail_reqs; + struct list_head cqp_pending_reqs; + struct nes_cqp_request *nes_cqp_requests; + + u32 int_req; + u32 int_stat; + u32 timer_int_req; + u32 timer_only_int_count; + u32 intf_int_req; + u32 last_mac_tx_pauses; + u32 last_used_chunks_tx; + struct list_head list; + + u16 base_doorbell_index; + u16 currcq_count; + u16 deepcq_count; + u8 msi_enabled; + u8 netdev_count; + u8 napi_isr_ran; + u8 disable_rx_flow_control; + u8 disable_tx_flow_control; +}; + + +static inline void +set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) +{ + wqe_words[index] = cpu_to_le32((u32) ((unsigned long)value)); + wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value))); +} + +static inline void +set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value) +{ + wqe_words[index] = cpu_to_le32(value); +} + +static inline void +nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev) +{ + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX, + (u64)((unsigned long) &nesdev->cqp)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX] = 0; +} + +static inline void +nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head) +{ + u32 value; + value = ((u32)((unsigned long) nesqp)) | head; + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX, + (u32)(upper_32_bits((unsigned long)(nesqp)))); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value); +} + +/* Read from memory-mapped device */ +static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index) +{ + unsigned long flags; + void __iomem *addr = nesdev->index_reg; + u32 value; + + spin_lock_irqsave(&nesdev->indexed_regs_lock, flags); + + writel(reg_index, addr); + value = readl((void __iomem *)addr + 4); + + spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags); + return value; +} + +static inline u32 nes_read32(const void __iomem *addr) +{ + return readl(addr); +} + +static inline u16 nes_read16(const void __iomem *addr) +{ + return readw(addr); +} + +static inline u8 nes_read8(const void __iomem *addr) +{ + return readb(addr); +} + +/* Write to memory-mapped device */ +static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val) +{ + unsigned long flags; + void __iomem *addr = nesdev->index_reg; + + spin_lock_irqsave(&nesdev->indexed_regs_lock, flags); + + writel(reg_index, addr); + writel(val, (void __iomem *)addr + 4); + + spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags); +} + +static inline void nes_write32(void __iomem *addr, u32 val) +{ + writel(val, addr); +} + +static inline void nes_write16(void __iomem *addr, u16 val) +{ + writew(val, addr); +} + +static inline void nes_write8(void __iomem *addr, u8 val) +{ + writeb(val, addr); +} + + + +static inline int nes_alloc_resource(struct nes_adapter *nesadapter, + unsigned long *resource_array, u32 max_resources, + u32 *req_resource_num, u32 *next) +{ + unsigned long flags; + u32 resource_num; + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + + resource_num = find_next_zero_bit(resource_array, max_resources, *next); + if (resource_num >= max_resources) { + resource_num = find_first_zero_bit(resource_array, max_resources); + if (resource_num >= max_resources) { + printk(KERN_ERR PFX "%s: No available resourcess.\n", __FUNCTION__); + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); + return -EMFILE; + } + } + set_bit(resource_num, resource_array); + *next = resource_num+1; + if (*next == max_resources) { + *next = 0; + } + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); + *req_resource_num = resource_num; + + return 0; +} + +static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter, + unsigned long *resource_array, u32 resource_num) +{ + unsigned long flags; + int bit_is_set; + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + + bit_is_set = test_bit(resource_num, resource_array); + nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n", + resource_num, (bit_is_set ? "": " not")); + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); + + return bit_is_set; +} + +static inline void nes_free_resource(struct nes_adapter *nesadapter, + unsigned long *resource_array, u32 resource_num) +{ + unsigned long flags; + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + clear_bit(resource_num, resource_array); + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); +} + +static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev) +{ + return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic; +} + +static inline struct nes_pd *to_nespd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct nes_pd, ibpd); +} + +static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct nes_ucontext, ibucontext); +} + +static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct nes_mr, ibmr); +} + +static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct nes_mr, ibfmr); +} + +static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct nes_mr, ibmw); +} + +static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr) +{ + return container_of(nesmr, struct nes_fmr, nesmr); +} + +static inline struct nes_cq *to_nescq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct nes_cq, ibcq); +} + +static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct nes_qp, ibqp); +} + + + +/* nes.c */ +void nes_add_ref(struct ib_qp *); +void nes_rem_ref(struct ib_qp *); +struct ib_qp *nes_get_qp(struct ib_device *, int); + + +/* nes_hw.c */ +struct nes_adapter *nes_init_adapter(struct nes_device *, u8); +void nes_nic_init_timer_defaults(struct nes_device *, u8); +unsigned int nes_reset_adapter_ne020(struct nes_device *, u8 *); +int nes_init_serdes(struct nes_device *, u8, u8, u8); +void nes_init_csr_ne020(struct nes_device *, u8, u8); +void nes_destroy_adapter(struct nes_adapter *); +int nes_init_cqp(struct nes_device *); +int nes_init_phy(struct nes_device *); +int nes_init_nic_qp(struct nes_device *, struct net_device *); +void nes_destroy_nic_qp(struct nes_vnic *); +int nes_napi_isr(struct nes_device *); +void nes_dpc(unsigned long); +void nes_process_ceq(struct nes_device *, struct nes_hw_ceq *); +void nes_process_aeq(struct nes_device *, struct nes_hw_aeq *); +void nes_process_mac_intr(struct nes_device *, u32); +void nes_nic_napi_ce_handler(struct nes_device *, struct nes_hw_nic_cq *); +void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *); +void nes_cqp_ce_handler(struct nes_device *, struct nes_hw_cq *); +void nes_process_iwarp_aeqe(struct nes_device *, struct nes_hw_aeqe *); +void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); +int nes_destroy_cqp(struct nes_device *); +int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); + +/* nes_nic.c */ +void nes_netdev_set_multicast_list(struct net_device *); +void nes_netdev_exit(struct nes_vnic *); +struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); +void nes_netdev_destroy(struct net_device *); +int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); + +/* nes_cm.c */ +void *nes_cm_create(struct net_device *); +int nes_cm_recv(struct sk_buff *, struct net_device *); +void nes_update_arp(unsigned char *, u32, u32, u16, u16); +void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32); +void nes_sock_release(struct nes_qp *, unsigned long *); +struct nes_cm_core *nes_cm_alloc_core(void); +void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32); +int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32); +int nes_cm_disconn(struct nes_qp *); +void nes_cm_disconn_worker(void *); + +/* nes_verbs.c */ +int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32); +int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); +struct nes_ib_device *nes_init_ofa_device(struct net_device *); +void nes_destroy_ofa_device(struct nes_ib_device *); +int nes_register_ofa_device(struct nes_ib_device *); +void nes_unregister_ofa_device(struct nes_ib_device *); + +/* nes_util.c */ +int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *); +void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16); +void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); +void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16); +void nes_read_10G_phy_reg(struct nes_device *, u16, u8); +struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); +void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); +int nes_arp_table(struct nes_device *, u32, u8 *, u32); +void nes_mh_fix(unsigned long); +void nes_clc(unsigned long); +void nes_dump_mem(unsigned int, void *, int); +u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32); + +#endif /* __NES_H */ diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c new file mode 100644 index 000000000000..bd5cfeaac203 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -0,0 +1,3088 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#define TCPOPT_TIMESTAMP 8 + +#include <asm/atomic.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/init.h> +#include <linux/if_arp.h> +#include <linux/notifier.h> +#include <linux/net.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/time.h> +#include <linux/delay.h> +#include <linux/etherdevice.h> +#include <linux/netdevice.h> +#include <linux/random.h> +#include <linux/list.h> +#include <linux/threads.h> + +#include <net/neighbour.h> +#include <net/route.h> +#include <net/ip_fib.h> + +#include "nes.h" + +u32 cm_packets_sent; +u32 cm_packets_bounced; +u32 cm_packets_dropped; +u32 cm_packets_retrans; +u32 cm_packets_created; +u32 cm_packets_received; +u32 cm_listens_created; +u32 cm_listens_destroyed; +u32 cm_backlog_drops; +atomic_t cm_loopbacks; +atomic_t cm_nodes_created; +atomic_t cm_nodes_destroyed; +atomic_t cm_accel_dropped_pkts; +atomic_t cm_resets_recvd; + +static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, + struct nes_vnic *, struct nes_cm_info *); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); + + +/* External CM API Interface */ +/* instance of function pointers for client API */ +/* set address of this instance to cm_core->cm_ops at cm_core alloc */ +static struct nes_cm_ops nes_cm_api = { + mini_cm_accelerated, + mini_cm_listen, + mini_cm_del_listen, + mini_cm_connect, + mini_cm_close, + mini_cm_accept, + mini_cm_reject, + mini_cm_recv_pkt, + mini_cm_dealloc_core, + mini_cm_get, + mini_cm_set +}; + +struct nes_cm_core *g_cm_core; + +atomic_t cm_connects; +atomic_t cm_accepts; +atomic_t cm_disconnects; +atomic_t cm_closes; +atomic_t cm_connecteds; +atomic_t cm_connect_reqs; +atomic_t cm_rejects; + + +/** + * create_event + */ +static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, + enum nes_cm_event_type type) +{ + struct nes_cm_event *event; + + if (!cm_node->cm_id) + return NULL; + + /* allocate an empty event */ + event = kzalloc(sizeof(*event), GFP_ATOMIC); + + if (!event) + return NULL; + + event->type = type; + event->cm_node = cm_node; + event->cm_info.rem_addr = cm_node->rem_addr; + event->cm_info.loc_addr = cm_node->loc_addr; + event->cm_info.rem_port = cm_node->rem_port; + event->cm_info.loc_port = cm_node->loc_port; + event->cm_info.cm_id = cm_node->cm_id; + + nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x]," + " src_addr=%08x[%x]\n", + event, type, + event->cm_info.loc_addr, event->cm_info.loc_port, + event->cm_info.rem_addr, event->cm_info.rem_port); + + nes_cm_post_event(event); + return event; +} + + +/** + * send_mpa_request + */ +int send_mpa_request(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb; + int ret; + + skb = get_free_pkt(cm_node); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + /* send an MPA Request frame */ + form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, + cm_node->mpa_frame_size, SET_ACK); + + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + if (ret < 0) { + return ret; + } + + return 0; +} + + +/** + * recv_mpa - process a received TCP pkt, we are expecting an + * IETF MPA frame + */ +static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) +{ + struct ietf_mpa_frame *mpa_frame; + + /* assume req frame is in tcp data payload */ + if (len < sizeof(struct ietf_mpa_frame)) { + nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); + return -1; + } + + mpa_frame = (struct ietf_mpa_frame *)buffer; + cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len); + + if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) { + nes_debug(NES_DBG_CM, "The received ietf buffer was not right" + " complete (%x + %x != %x)\n", + cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len); + return -1; + } + + /* copy entire MPA frame to our cm_node's frame */ + memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame), + cm_node->mpa_frame_size); + + return 0; +} + + +/** + * handle_exception_pkt - process an exception packet. + * We have been in a TSA state, and we have now received SW + * TCP/IP traffic should be a FIN request or IP pkt with options + */ +static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret = 0; + struct tcphdr *tcph = tcp_hdr(skb); + + /* first check to see if this a FIN pkt */ + if (tcph->fin) { + /* we need to ACK the FIN request */ + send_ack(cm_node); + + /* check which side we are (client/server) and set next state accordingly */ + if (cm_node->tcp_cntxt.client) + cm_node->state = NES_CM_STATE_CLOSING; + else { + /* we are the server side */ + cm_node->state = NES_CM_STATE_CLOSE_WAIT; + /* since this is a self contained CM we don't wait for */ + /* an APP to close us, just send final FIN immediately */ + ret = send_fin(cm_node, NULL); + cm_node->state = NES_CM_STATE_LAST_ACK; + } + } else { + ret = -EINVAL; + } + + return ret; +} + + +/** + * form_cm_frame - get a free packet and build empty frame Use + * node info to build. + */ +struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, + void *options, u32 optionsize, void *data, u32 datasize, u8 flags) +{ + struct tcphdr *tcph; + struct iphdr *iph; + struct ethhdr *ethh; + u8 *buf; + u16 packetsize = sizeof(*iph); + + packetsize += sizeof(*tcph); + packetsize += optionsize + datasize; + + memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); + + skb->len = 0; + buf = skb_put(skb, packetsize + ETH_HLEN); + + ethh = (struct ethhdr *) buf; + buf += ETH_HLEN; + + iph = (struct iphdr *)buf; + buf += sizeof(*iph); + tcph = (struct tcphdr *)buf; + skb_reset_mac_header(skb); + skb_set_network_header(skb, ETH_HLEN); + skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph)); + buf += sizeof(*tcph); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->protocol = htons(0x800); + skb->data_len = 0; + skb->mac_len = ETH_HLEN; + + memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN); + memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN); + ethh->h_proto = htons(0x0800); + + iph->version = IPVERSION; + iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ + iph->tos = 0; + iph->tot_len = htons(packetsize); + iph->id = htons(++cm_node->tcp_cntxt.loc_id); + + iph->frag_off = htons(0x4000); + iph->ttl = 0x40; + iph->protocol = 0x06; /* IPPROTO_TCP */ + + iph->saddr = htonl(cm_node->loc_addr); + iph->daddr = htonl(cm_node->rem_addr); + + tcph->source = htons(cm_node->loc_port); + tcph->dest = htons(cm_node->rem_port); + tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); + + if (flags & SET_ACK) { + cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; + tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); + tcph->ack = 1; + } else + tcph->ack_seq = 0; + + if (flags & SET_SYN) { + cm_node->tcp_cntxt.loc_seq_num++; + tcph->syn = 1; + } else + cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */ + + if (flags & SET_FIN) + tcph->fin = 1; + + if (flags & SET_RST) + tcph->rst = 1; + + tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2); + tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd); + tcph->urg_ptr = 0; + if (optionsize) + memcpy(buf, options, optionsize); + buf += optionsize; + if (datasize) + memcpy(buf, data, datasize); + + skb_shinfo(skb)->nr_frags = 0; + cm_packets_created++; + + return skb; +} + + +/** + * print_core - dump a cm core + */ +static void print_core(struct nes_cm_core *core) +{ + nes_debug(NES_DBG_CM, "---------------------------------------------\n"); + nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core); + if (!core) + return; + nes_debug(NES_DBG_CM, "---------------------------------------------\n"); + nes_debug(NES_DBG_CM, "Session ID : %u \n", atomic_read(&core->session_id)); + + nes_debug(NES_DBG_CM, "State : %u \n", core->state); + + nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list)); + nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); + nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); + + nes_debug(NES_DBG_CM, "core : %p \n", core); + + nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); +} + + +/** + * schedule_nes_timer + * note - cm_node needs to be protected before calling this. Encase in: + * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); + */ +int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, + enum nes_timer_type type, int send_retrans, + int close_when_complete) +{ + unsigned long flags; + struct nes_cm_core *cm_core; + struct nes_timer_entry *new_send; + int ret = 0; + u32 was_timer_set; + + new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); + if (!new_send) + return -1; + if (!cm_node) + return -EINVAL; + + /* new_send->timetosend = currenttime */ + new_send->retrycount = NES_DEFAULT_RETRYS; + new_send->retranscount = NES_DEFAULT_RETRANS; + new_send->skb = skb; + new_send->timetosend = jiffies; + new_send->type = type; + new_send->netdev = cm_node->netdev; + new_send->send_retrans = send_retrans; + new_send->close_when_complete = close_when_complete; + + if (type == NES_TIMER_TYPE_CLOSE) { + new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */ + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_add_tail(&new_send->list, &cm_node->recv_list); + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + } + + if (type == NES_TIMER_TYPE_SEND) { + new_send->seq_num = htonl(tcp_hdr(skb)->seq); + atomic_inc(&new_send->skb->users); + + ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n", + new_send, jiffies); + atomic_dec(&new_send->skb->users); + new_send->timetosend = jiffies; + } else { + cm_packets_sent++; + if (!send_retrans) { + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, cm_node); + dev_kfree_skb_any(new_send->skb); + kfree(new_send); + return ret; + } + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; + } + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + list_add_tail(&new_send->list, &cm_node->retrans_list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + } + if (type == NES_TIMER_TYPE_RECV) { + new_send->seq_num = htonl(tcp_hdr(skb)->seq); + new_send->timetosend = jiffies; + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_add_tail(&new_send->list, &cm_node->recv_list); + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + } + cm_core = cm_node->cm_core; + + was_timer_set = timer_pending(&cm_core->tcp_timer); + + if (!was_timer_set) { + cm_core->tcp_timer.expires = new_send->timetosend; + add_timer(&cm_core->tcp_timer); + } + + return ret; +} + + +/** + * nes_cm_timer_tick + */ +void nes_cm_timer_tick(unsigned long pass) +{ + unsigned long flags, qplockflags; + unsigned long nexttimeout = jiffies + NES_LONG_TIME; + struct iw_cm_id *cm_id; + struct nes_cm_node *cm_node; + struct nes_timer_entry *send_entry, *recv_entry; + struct list_head *list_core, *list_core_temp; + struct list_head *list_node, *list_node_temp; + struct nes_cm_core *cm_core = g_cm_core; + struct nes_qp *nesqp; + struct sk_buff *skb; + u32 settimer = 0; + int ret = NETDEV_TX_OK; + int node_done; + + spin_lock_irqsave(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + cm_node = container_of(list_node, struct nes_cm_node, list); + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { + recv_entry = container_of(list_core, struct nes_timer_entry, list); + if ((time_after(recv_entry->timetosend, jiffies)) && + (recv_entry->type == NES_TIMER_TYPE_CLOSE)) { + if (nexttimeout > recv_entry->timetosend || !settimer) { + nexttimeout = recv_entry->timetosend; + settimer = 1; + } + continue; + } + list_del(&recv_entry->list); + cm_id = cm_node->cm_id; + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: " + "****** HIT A NES_TIMER_TYPE_CLOSE" + " with something to do!!! ******\n", + nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:" + " ****** HIT A NES_TIMER_TYPE_CLOSE" + " with nothing to do!!! ******\n", + nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nes_rem_ref(&nesqp->ibqp); + } + if (cm_id) + cm_id->rem_ref(cm_id); + } + kfree(recv_entry); + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + } + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + node_done = 0; + list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { + if (node_done) { + break; + } + send_entry = container_of(list_core, struct nes_timer_entry, list); + if (time_after(send_entry->timetosend, jiffies)) { + if (cm_node->state != NES_CM_STATE_TSA) { + if ((nexttimeout > send_entry->timetosend) || !settimer) { + nexttimeout = send_entry->timetosend; + settimer = 1; + } + node_done = 1; + continue; + } else { + list_del(&send_entry->list); + skb = send_entry->skb; + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + dev_kfree_skb_any(skb); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + } + if (send_entry->type == NES_TIMER_NODE_CLEANUP) { + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) || + (cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + skb = send_entry->skb; + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + kfree(send_entry); + dev_kfree_skb_any(skb); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + + if (!send_entry->retranscount || !send_entry->retrycount) { + cm_packets_dropped++; + skb = send_entry->skb; + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + dev_kfree_skb_any(skb); + kfree(send_entry); + if (cm_node->state == NES_CM_STATE_SYN_RCVD) { + /* this node never even generated an indication up to the cm */ + rem_ref_cm_node(cm_core, cm_node); + } else { + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); + } + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + /* this seems like the correct place, but leave send entry unprotected */ + // spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + atomic_inc(&send_entry->skb->users); + cm_packets_retrans++; + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," + " jiffies = %lu, time to send = %lu, retranscount = %u, " + "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n", + send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount, + send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + if (ret != NETDEV_TX_OK) { + cm_packets_bounced++; + atomic_dec(&send_entry->skb->users); + send_entry->retrycount--; + nexttimeout = jiffies + NES_SHORT_TIME; + settimer = 1; + node_done = 1; + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } else { + cm_packets_sent++; + } + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + list_del(&send_entry->list); + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n", + send_entry->retranscount, send_entry->retrycount); + if (send_entry->send_retrans) { + send_entry->retranscount--; + send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT; + if (nexttimeout > send_entry->timetosend || !settimer) { + nexttimeout = send_entry->timetosend; + settimer = 1; + } + list_add(&send_entry->list, &cm_node->retrans_list); + continue; + } else { + int close_when_complete; + skb = send_entry->skb; + close_when_complete = send_entry->close_when_complete; + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + if (close_when_complete) { + BUG_ON(atomic_read(&cm_node->ref_count) == 1); + rem_ref_cm_node(cm_core, cm_node); + } + dev_kfree_skb_any(skb); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + } + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + + rem_ref_cm_node(cm_core, cm_node); + + spin_lock_irqsave(&cm_core->ht_lock, flags); + if (ret != NETDEV_TX_OK) + break; + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + if (settimer) { + if (!timer_pending(&cm_core->tcp_timer)) { + cm_core->tcp_timer.expires = nexttimeout; + add_timer(&cm_core->tcp_timer); + } + } +} + + +/** + * send_syn + */ +int send_syn(struct nes_cm_node *cm_node, u32 sendack) +{ + int ret; + int flags = SET_SYN; + struct sk_buff *skb; + char optionsbuffer[sizeof(struct option_mss) + + sizeof(struct option_windowscale) + + sizeof(struct option_base) + 1]; + + int optionssize = 0; + /* Sending MSS option */ + union all_known_options *options; + + if (!cm_node) + return -EINVAL; + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_mss.optionnum = OPTION_NUMBER_MSS; + options->as_mss.length = sizeof(struct option_mss); + options->as_mss.mss = htons(cm_node->tcp_cntxt.mss); + optionssize += sizeof(struct option_mss); + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; + options->as_windowscale.length = sizeof(struct option_windowscale); + options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; + optionssize += sizeof(struct option_windowscale); + + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) + ) { + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_base.optionnum = OPTION_NUMBER_WRITE0; + options->as_base.length = sizeof(struct option_base); + optionssize += sizeof(struct option_base); + /* we need the size to be a multiple of 4 */ + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = 1; + optionssize += 1; + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = 1; + optionssize += 1; + } + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = OPTION_NUMBER_END; + optionssize += 1; + + skb = get_free_pkt(cm_node); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + if (sendack) + flags |= SET_ACK; + + form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + return ret; +} + + +/** + * send_reset + */ +int send_reset(struct nes_cm_node *cm_node) +{ + int ret; + struct sk_buff *skb = get_free_pkt(cm_node); + int flags = SET_RST | SET_ACK; + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + add_ref_cm_node(cm_node); + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); + + return ret; +} + + +/** + * send_ack + */ +int send_ack(struct nes_cm_node *cm_node) +{ + int ret; + struct sk_buff *skb = get_free_pkt(cm_node); + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0); + + return ret; +} + + +/** + * send_fin + */ +int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret; + + /* if we didn't get a frame get one */ + if (!skb) + skb = get_free_pkt(cm_node); + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + return ret; +} + + +/** + * get_free_pkt + */ +struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb, *new_skb; + + /* check to see if we need to repopulate the free tx pkt queue */ + if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) { + while (skb_queue_len(&cm_node->cm_core->tx_free_list) < + cm_node->cm_core->free_tx_pkt_max) { + /* replace the frame we took, we won't get it back */ + new_skb = dev_alloc_skb(cm_node->cm_core->mtu); + BUG_ON(!new_skb); + /* add a replacement frame to the free tx list head */ + skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb); + } + } + + skb = skb_dequeue(&cm_node->cm_core->tx_free_list); + + return skb; +} + + +/** + * make_hashkey - generate hash key from node tuple + */ +static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port, + nes_addr_t rem_addr) +{ + u32 hashkey = 0; + + hashkey = loc_addr + rem_addr + loc_port + rem_port; + hashkey = (hashkey % NES_CM_HASHTABLE_SIZE); + + return hashkey; +} + + +/** + * find_node - find a cm node that matches the reference cm node + */ +static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, + u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) +{ + unsigned long flags; + u32 hashkey; + struct list_head *list_pos; + struct list_head *hte; + struct nes_cm_node *cm_node; + + /* make a hash index key for this packet */ + hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr); + + /* get a handle on the hte */ + hte = &cm_core->connected_nodes; + + nes_debug(NES_DBG_CM, "Searching for an owner node:%x:%x from core %p->%p\n", + loc_addr, loc_port, cm_core, hte); + + /* walk list and find cm_node associated with this session ID */ + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each(list_pos, hte) { + cm_node = container_of(list_pos, struct nes_cm_node, list); + /* compare quad, return node handle if a match */ + nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", + cm_node->loc_addr, cm_node->loc_port, + loc_addr, loc_port, + cm_node->rem_addr, cm_node->rem_port, + rem_addr, rem_port); + if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && + (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + return cm_node; + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + /* no owner node */ + return NULL; +} + + +/** + * find_listener - find a cm node listening on this addr-port pair + */ +static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, + nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) +{ + unsigned long flags; + struct list_head *listen_list; + struct nes_cm_listener *listen_node; + + /* walk list and find cm_node associated with this session ID */ + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each(listen_list, &cm_core->listen_list.list) { + listen_node = container_of(listen_list, struct nes_cm_listener, list); + /* compare node pair, return node handle if a match */ + if (((listen_node->loc_addr == dst_addr) || + listen_node->loc_addr == 0x00000000) && + (listen_node->loc_port == dst_port) && + (listener_state & listen_node->listener_state)) { + atomic_inc(&listen_node->ref_count); + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + return listen_node; + } + } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + nes_debug(NES_DBG_CM, "Unable to find listener- %x:%x\n", + dst_addr, dst_port); + + /* no listener */ + return NULL; +} + + +/** + * add_hte_node - add a cm node to the hash table + */ +static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + unsigned long flags; + u32 hashkey; + struct list_head *hte; + + if (!cm_node || !cm_core) + return -EINVAL; + + nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n"); + + /* first, make an index into our hash table */ + hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, + cm_node->rem_port, cm_node->rem_addr); + cm_node->hashkey = hashkey; + + spin_lock_irqsave(&cm_core->ht_lock, flags); + + /* get a handle on the hash table element (list head for this slot) */ + hte = &cm_core->connected_nodes; + list_add_tail(&cm_node->list, hte); + atomic_inc(&cm_core->ht_node_cnt); + + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + return 0; +} + + +/** + * mini_cm_dec_refcnt_listen + */ +static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, + struct nes_cm_listener *listener, int free_hanging_nodes) +{ + int ret = 1; + unsigned long flags; + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + if (!atomic_dec_return(&listener->ref_count)) { + list_del(&listener->list); + + /* decrement our listen node count */ + atomic_dec(&cm_core->listen_node_cnt); + + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + if (listener->nesvnic) { + nes_manage_apbvt(listener->nesvnic, listener->loc_port, + PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + } + + nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); + + kfree(listener); + ret = 0; + cm_listens_destroyed++; + } else { + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + } + if (listener) { + if (atomic_read(&listener->pend_accepts_cnt) > 0) + nes_debug(NES_DBG_CM, "destroying listener (%p)" + " with non-zero pending accepts=%u\n", + listener, atomic_read(&listener->pend_accepts_cnt)); + } + + return ret; +} + + +/** + * mini_cm_del_listen + */ +static int mini_cm_del_listen(struct nes_cm_core *cm_core, + struct nes_cm_listener *listener) +{ + listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; + listener->cm_id = NULL; /* going to be destroyed pretty soon */ + return mini_cm_dec_refcnt_listen(cm_core, listener, 1); +} + + +/** + * mini_cm_accelerated + */ +static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, + struct nes_cm_node *cm_node) +{ + u32 was_timer_set; + cm_node->accelerated = 1; + + if (cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); + } + + was_timer_set = timer_pending(&cm_core->tcp_timer); + if (!was_timer_set) { + cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME; + add_timer(&cm_core->tcp_timer); + } + + return 0; +} + + +/** + * nes_addr_send_arp + */ +static void nes_addr_send_arp(u32 dst_ip) +{ + struct rtable *rt; + struct flowi fl; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = htonl(dst_ip); + if (ip_route_output_key(&init_net, &rt, &fl)) { + printk("%s: ip_route_output_key failed for 0x%08X\n", + __FUNCTION__, dst_ip); + return; + } + + neigh_event_send(rt->u.dst.neighbour, NULL); + ip_rt_put(rt); +} + + +/** + * make_cm_node - create a new instance of a cm node + */ +static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, + struct nes_cm_listener *listener) +{ + struct nes_cm_node *cm_node; + struct timespec ts; + int arpindex = 0; + struct nes_device *nesdev; + struct nes_adapter *nesadapter; + + /* create an hte and cm_node for this instance */ + cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); + if (!cm_node) + return NULL; + + /* set our node specific transport info */ + cm_node->loc_addr = cm_info->loc_addr; + cm_node->rem_addr = cm_info->rem_addr; + cm_node->loc_port = cm_info->loc_port; + cm_node->rem_port = cm_info->rem_port; + cm_node->send_write0 = send_first; + nes_debug(NES_DBG_CM, "Make node addresses : loc = %x:%x, rem = %x:%x\n", + cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port); + cm_node->listener = listener; + cm_node->netdev = nesvnic->netdev; + cm_node->cm_id = cm_info->cm_id; + memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); + + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", + cm_node->listener, cm_node->cm_id); + + INIT_LIST_HEAD(&cm_node->retrans_list); + spin_lock_init(&cm_node->retrans_list_lock); + INIT_LIST_HEAD(&cm_node->recv_list); + spin_lock_init(&cm_node->recv_list_lock); + + cm_node->loopbackpartner = NULL; + atomic_set(&cm_node->ref_count, 1); + /* associate our parent CM core */ + cm_node->cm_core = cm_core; + cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; + cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> + NES_CM_DEFAULT_RCV_WND_SCALE; + ts = current_kernel_time(); + cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - + sizeof(struct tcphdr) - ETH_HLEN; + cm_node->tcp_cntxt.rcv_nxt = 0; + /* get a unique session ID , add thread_id to an upcounter to handle race */ + atomic_inc(&cm_core->node_cnt); + atomic_inc(&cm_core->session_id); + cm_node->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid); + cm_node->conn_type = cm_info->conn_type; + cm_node->apbvt_set = 0; + cm_node->accept_pend = 0; + + cm_node->nesvnic = nesvnic; + /* get some device handles, for arp lookup */ + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + cm_node->loopbackpartner = NULL; + /* get the mac addr for the remote node */ + arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); + if (arpindex < 0) { + kfree(cm_node); + nes_addr_send_arp(cm_info->rem_addr); + return NULL; + } + + /* copy the mac addr to node context */ + memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN); + nes_debug(NES_DBG_CM, "Remote mac addr from arp table:%02x," + " %02x, %02x, %02x, %02x, %02x\n", + cm_node->rem_mac[0], cm_node->rem_mac[1], + cm_node->rem_mac[2], cm_node->rem_mac[3], + cm_node->rem_mac[4], cm_node->rem_mac[5]); + + add_hte_node(cm_core, cm_node); + atomic_inc(&cm_nodes_created); + + return cm_node; +} + + +/** + * add_ref_cm_node - destroy an instance of a cm node + */ +static int add_ref_cm_node(struct nes_cm_node *cm_node) +{ + atomic_inc(&cm_node->ref_count); + return 0; +} + + +/** + * rem_ref_cm_node - destroy an instance of a cm node + */ +static int rem_ref_cm_node(struct nes_cm_core *cm_core, + struct nes_cm_node *cm_node) +{ + unsigned long flags, qplockflags; + struct nes_timer_entry *send_entry; + struct nes_timer_entry *recv_entry; + struct iw_cm_id *cm_id; + struct list_head *list_core, *list_node_temp; + struct nes_qp *nesqp; + + if (!cm_node) + return -EINVAL; + + spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags); + if (atomic_dec_return(&cm_node->ref_count)) { + spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags); + return 0; + } + list_del(&cm_node->list); + atomic_dec(&cm_core->ht_node_cnt); + spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags); + + /* if the node is destroyed before connection was accelerated */ + if (!cm_node->accelerated && cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); + } + + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { + send_entry = container_of(list_core, struct nes_timer_entry, list); + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + dev_kfree_skb_any(send_entry->skb); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { + recv_entry = container_of(list_core, struct nes_timer_entry, list); + list_del(&recv_entry->list); + cm_id = cm_node->cm_id; + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" + " with something to do!!! ******\n", + nesqp->hwqp.qp_id, cm_id); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" + " with nothing to do!!! ******\n", + nesqp->hwqp.qp_id, cm_id); + nes_rem_ref(&nesqp->ibqp); + } + cm_id->rem_ref(cm_id); + } else if (recv_entry->type == NES_TIMER_TYPE_RECV) { + dev_kfree_skb_any(recv_entry->skb); + } + kfree(recv_entry); + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + } + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + + if (cm_node->listener) { + mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); + } else { + if (cm_node->apbvt_set && cm_node->nesvnic) { + nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, + PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + } + } + + kfree(cm_node); + atomic_dec(&cm_core->node_cnt); + atomic_inc(&cm_nodes_destroyed); + + return 0; +} + + +/** + * process_options + */ +static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet) +{ + u32 tmp; + u32 offset = 0; + union all_known_options *all_options; + char got_mss_option = 0; + + while (offset < optionsize) { + all_options = (union all_known_options *)(optionsloc + offset); + switch (all_options->as_base.optionnum) { + case OPTION_NUMBER_END: + offset = optionsize; + break; + case OPTION_NUMBER_NONE: + offset += 1; + continue; + case OPTION_NUMBER_MSS: + nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n", + __FUNCTION__, + all_options->as_mss.length, offset, optionsize); + got_mss_option = 1; + if (all_options->as_mss.length != 4) { + return 1; + } else { + tmp = ntohs(all_options->as_mss.mss); + if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss = tmp; + } + break; + case OPTION_NUMBER_WINDOW_SCALE: + cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount; + break; + case OPTION_NUMBER_WRITE0: + cm_node->send_write0 = 1; + break; + default: + nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", + all_options->as_base.optionnum); + break; + } + offset += all_options->as_base.length; + } + if ((!got_mss_option) && (syn_packet)) + cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + return 0; +} + + +/** + * process_packet + */ +int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct nes_cm_core *cm_core) +{ + int optionsize; + int datasize; + int ret = 0; + struct tcphdr *tcph = tcp_hdr(skb); + u32 inc_sequence; + if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) { + inc_sequence = ntohl(tcph->seq); + cm_node->tcp_cntxt.rcv_nxt = inc_sequence; + } + + if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) { + BUG_ON(!tcph); + atomic_inc(&cm_accel_dropped_pkts); + return -1; + } + + if (tcph->rst) { + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n", + cm_node, cm_node->state, atomic_read(&cm_node->ref_count)); + switch (cm_node->state) { + case NES_CM_STATE_LISTENING: + rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSED: + break; + case NES_CM_STATE_SYN_RCVD: + nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," + " remote 0x%08X:%04X, node state = %u\n", + cm_node->loc_addr, cm_node->loc_port, + cm_node->rem_addr, cm_node->rem_port, + cm_node->state); + rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREQ_SENT: + default: + nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," + " remote 0x%08X:%04X, node state = %u refcnt=%d\n", + cm_node->loc_addr, cm_node->loc_port, + cm_node->rem_addr, cm_node->rem_port, + cm_node->state, atomic_read(&cm_node->ref_count)); + // create event + cm_node->state = NES_CM_STATE_CLOSED; + + create_event(cm_node, NES_CM_EVENT_ABORTED); + break; + + } + return -1; + } + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + + skb_pull(skb, ip_hdr(skb)->ihl << 2); + skb_pull(skb, tcph->doff << 2); + + datasize = skb->len; + inc_sequence = ntohl(tcph->seq); + nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X," + " rcv_nxt = 0x%08X Flags: %s %s.\n", + datasize, inc_sequence, ntohl(tcph->ack_seq), + cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""), + (tcph->ack ? "ACK":"")); + + if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt) + ) { + nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X," + " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n", + datasize, inc_sequence, ntohl(tcph->ack_seq), + cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":"")); + if (cm_node->state == NES_CM_STATE_LISTENING) { + rem_ref_cm_node(cm_core, cm_node); + } + return -1; + } + + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + + + if (optionsize) { + u8 *optionsloc = (u8 *)&tcph[1]; + if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) { + nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __FUNCTION__, cm_node); + send_reset(cm_node); + if (cm_node->state != NES_CM_STATE_SYN_SENT) + rem_ref_cm_node(cm_core, cm_node); + return 0; + } + } else if (tcph->syn) + cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + + cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << + cm_node->tcp_cntxt.snd_wscale; + + if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) { + cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; + } + + if (tcph->ack) { + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + /* read and stash current sequence number */ + if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) { + nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !=" + " cm_node->tcp_cntxt.loc_seq_num\n"); + send_reset(cm_node); + return 0; + } + if (cm_node->state == NES_CM_STATE_SYN_SENT) + cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED; + else { + cm_node->state = NES_CM_STATE_ESTABLISHED; + } + break; + case NES_CM_STATE_LAST_ACK: + cm_node->state = NES_CM_STATE_CLOSED; + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->state = NES_CM_STATE_FIN_WAIT2; + break; + case NES_CM_STATE_CLOSING: + cm_node->state = NES_CM_STATE_TIME_WAIT; + /* need to schedule this to happen in 2MSL timeouts */ + cm_node->state = NES_CM_STATE_CLOSED; + break; + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_CLOSE_WAIT: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSED: + break; + case NES_CM_STATE_LISTENING: + nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn); + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + send_reset(cm_node); + /* send_reset bumps refcount, this should have been a new node */ + rem_ref_cm_node(cm_core, cm_node); + return -1; + break; + case NES_CM_STATE_TSA: + nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n"); + break; + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_FIN_WAIT2: + default: + nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n", + cm_node->state); + send_reset(cm_node); + break; + } + } + + if (tcph->syn) { + if (cm_node->state == NES_CM_STATE_LISTENING) { + /* do not exceed backlog */ + atomic_inc(&cm_node->listener->pend_accepts_cnt); + if (atomic_read(&cm_node->listener->pend_accepts_cnt) > + cm_node->listener->backlog) { + nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n"); + cm_backlog_drops++; + atomic_dec(&cm_node->listener->pend_accepts_cnt); + rem_ref_cm_node(cm_core, cm_node); + return 0; + } + cm_node->accept_pend = 1; + + } + if (datasize == 0) + cm_node->tcp_cntxt.rcv_nxt ++; + + if (cm_node->state == NES_CM_STATE_LISTENING) { + cm_node->state = NES_CM_STATE_SYN_RCVD; + send_syn(cm_node, 1); + } + if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) { + cm_node->state = NES_CM_STATE_ESTABLISHED; + /* send final handshake ACK */ + ret = send_ack(cm_node); + if (ret < 0) + return ret; + + cm_node->state = NES_CM_STATE_MPAREQ_SENT; + ret = send_mpa_request(cm_node); + if (ret < 0) + return ret; + } + } + + if (tcph->fin) { + cm_node->tcp_cntxt.rcv_nxt++; + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + cm_node->state = NES_CM_STATE_CLOSE_WAIT; + cm_node->state = NES_CM_STATE_LAST_ACK; + ret = send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->state = NES_CM_STATE_CLOSING; + ret = send_ack(cm_node); + break; + case NES_CM_STATE_FIN_WAIT2: + cm_node->state = NES_CM_STATE_TIME_WAIT; + cm_node->tcp_cntxt.loc_seq_num ++; + ret = send_ack(cm_node); + /* need to schedule this to happen in 2MSL timeouts */ + cm_node->state = NES_CM_STATE_CLOSED; + break; + case NES_CM_STATE_CLOSE_WAIT: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_TSA: + default: + nes_debug(NES_DBG_CM, "Received a fin while in %x state\n", + cm_node->state); + ret = -EINVAL; + break; + } + } + + if (datasize) { + u8 *dataloc = skb->data; + /* figure out what state we are in and handle transition to next state */ + switch (cm_node->state) { + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_CLOSE_WAIT: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + break; + case NES_CM_STATE_MPAREQ_SENT: + /* recv the mpa res frame, ret=frame len (incl priv data) */ + ret = parse_mpa(cm_node, dataloc, datasize); + if (ret < 0) + break; + /* set the req frame payload len in skb */ + /* we are done handling this state, set node to a TSA state */ + cm_node->state = NES_CM_STATE_TSA; + send_ack(cm_node); + create_event(cm_node, NES_CM_EVENT_CONNECTED); + break; + + case NES_CM_STATE_ESTABLISHED: + /* we are expecting an MPA req frame */ + ret = parse_mpa(cm_node, dataloc, datasize); + if (ret < 0) { + break; + } + cm_node->state = NES_CM_STATE_TSA; + send_ack(cm_node); + /* we got a valid MPA request, create an event */ + create_event(cm_node, NES_CM_EVENT_MPA_REQ); + break; + case NES_CM_STATE_TSA: + handle_exception_pkt(cm_node, skb); + break; + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + default: + ret = -1; + } + } + + return ret; +} + + +/** + * mini_cm_listen - create a listen node with params + */ +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) +{ + struct nes_cm_listener *listener; + unsigned long flags; + + nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", + cm_info->loc_addr, cm_info->loc_port); + + /* cannot have multiple matching listeners */ + listener = find_listener(cm_core, htonl(cm_info->loc_addr), + htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); + if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { + /* find automatically incs ref count ??? */ + atomic_dec(&listener->ref_count); + nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n"); + return NULL; + } + + if (!listener) { + /* create a CM listen node (1/2 node to compare incoming traffic to) */ + listener = kzalloc(sizeof(*listener), GFP_ATOMIC); + if (!listener) { + nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n"); + return NULL; + } + + memset(listener, 0, sizeof(struct nes_cm_listener)); + listener->loc_addr = htonl(cm_info->loc_addr); + listener->loc_port = htons(cm_info->loc_port); + listener->reused_node = 0; + + atomic_set(&listener->ref_count, 1); + } + /* pasive case */ + /* find already inc'ed the ref count */ + else { + listener->reused_node = 1; + } + + listener->cm_id = cm_info->cm_id; + atomic_set(&listener->pend_accepts_cnt, 0); + listener->cm_core = cm_core; + listener->nesvnic = nesvnic; + atomic_inc(&cm_core->node_cnt); + atomic_inc(&cm_core->session_id); + + listener->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid); + listener->conn_type = cm_info->conn_type; + listener->backlog = cm_info->backlog; + listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE; + + if (!listener->reused_node) { + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_add(&listener->list, &cm_core->listen_list.list); + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + atomic_inc(&cm_core->listen_node_cnt); + } + + nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x," + " listener = %p, backlog = %d, cm_id = %p.\n", + cm_info->loc_addr, cm_info->loc_port, + listener, listener->backlog, listener->cm_id); + + return listener; +} + + +/** + * mini_cm_connect - make a connection node with params + */ +struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct ietf_mpa_frame *mpa_frame, + struct nes_cm_info *cm_info) +{ + int ret = 0; + struct nes_cm_node *cm_node; + struct nes_cm_listener *loopbackremotelistener; + struct nes_cm_node *loopbackremotenode; + struct nes_cm_info loopback_cm_info; + + u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + + ntohs(mpa_frame->priv_data_len); + + cm_info->loc_addr = htonl(cm_info->loc_addr); + cm_info->rem_addr = htonl(cm_info->rem_addr); + cm_info->loc_port = htons(cm_info->loc_port); + cm_info->rem_port = htons(cm_info->rem_port); + + /* create a CM connection node */ + cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); + if (!cm_node) + return NULL; + + // set our node side to client (active) side + cm_node->tcp_cntxt.client = 1; + cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + + if (cm_info->loc_addr == cm_info->rem_addr) { + loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, + cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE); + if (loopbackremotelistener == NULL) { + create_event(cm_node, NES_CM_EVENT_ABORTED); + } else { + atomic_inc(&cm_loopbacks); + loopback_cm_info = *cm_info; + loopback_cm_info.loc_port = cm_info->rem_port; + loopback_cm_info.rem_port = cm_info->loc_port; + loopback_cm_info.cm_id = loopbackremotelistener->cm_id; + loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, + loopbackremotelistener); + loopbackremotenode->loopbackpartner = cm_node; + loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + cm_node->loopbackpartner = loopbackremotenode; + memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data, + mpa_frame_size); + loopbackremotenode->mpa_frame_size = mpa_frame_size - + sizeof(struct ietf_mpa_frame); + + // we are done handling this state, set node to a TSA state + cm_node->state = NES_CM_STATE_TSA; + cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; + + create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); + } + return cm_node; + } + + /* set our node side to client (active) side */ + cm_node->tcp_cntxt.client = 1; + /* init our MPA frame ptr */ + memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); + cm_node->mpa_frame_size = mpa_frame_size; + + /* send a syn and goto syn sent state */ + cm_node->state = NES_CM_STATE_SYN_SENT; + ret = send_syn(cm_node, 0); + + nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x," + " cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id); + + return cm_node; +} + + +/** + * mini_cm_accept - accept a connection + * This function is never called + */ +int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, + struct nes_cm_node *cm_node) +{ + return 0; +} + + +/** + * mini_cm_reject - reject and teardown a connection + */ +int mini_cm_reject(struct nes_cm_core *cm_core, + struct ietf_mpa_frame *mpa_frame, + struct nes_cm_node *cm_node) +{ + int ret = 0; + struct sk_buff *skb; + u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + + ntohs(mpa_frame->priv_data_len); + + skb = get_free_pkt(cm_node); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + /* send an MPA Request frame */ + form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + cm_node->state = NES_CM_STATE_CLOSED; + ret = send_fin(cm_node, NULL); + + if (ret < 0) { + printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n"); + return ret; + } + + return ret; +} + + +/** + * mini_cm_close + */ +int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + int ret = 0; + + if (!cm_core || !cm_node) + return -EINVAL; + + switch (cm_node->state) { + /* if passed in node is null, create a reference key node for node search */ + /* check if we found an owner node for this pkt */ + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + cm_node->state = NES_CM_STATE_FIN_WAIT1; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_CLOSE_WAIT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSING: + ret = -1; + break; + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_TSA: + ret = rem_ref_cm_node(cm_core, cm_node); + break; + } + cm_node->cm_id = NULL; + return ret; +} + + +/** + * recv_pkt - recv an ETHERNET packet, and process it through CM + * node state machine + */ +int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, + struct sk_buff *skb) +{ + struct nes_cm_node *cm_node = NULL; + struct nes_cm_listener *listener = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct nes_cm_info nfo; + int ret = 0; + + if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { + ret = -EINVAL; + goto out; + } + + iph = (struct iphdr *)skb->data; + tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*tcph)); + skb->len = ntohs(iph->tot_len); + + nfo.loc_addr = ntohl(iph->daddr); + nfo.loc_port = ntohs(tcph->dest); + nfo.rem_addr = ntohl(iph->saddr); + nfo.rem_port = ntohs(tcph->source); + + nes_debug(NES_DBG_CM, "Received packet: dest=0x%08X:0x%04X src=0x%08X:0x%04X\n", + iph->daddr, tcph->dest, iph->saddr, tcph->source); + + /* note: this call is going to increment cm_node ref count */ + cm_node = find_node(cm_core, + nfo.rem_port, nfo.rem_addr, + nfo.loc_port, nfo.loc_addr); + + if (!cm_node) { + listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); + if (listener) { + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; + } else { + nfo.cm_id = NULL; + nfo.conn_type = 0; + } + + cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); + if (!cm_node) { + nes_debug(NES_DBG_CM, "Unable to allocate node\n"); + if (listener) { + nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n"); + atomic_dec(&listener->ref_count); + } + ret = -1; + goto out; + } + if (!listener) { + nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n", + nfo.loc_port, atomic_read(&cm_node->ref_count)); + if (!tcph->rst) { + nes_debug(NES_DBG_CM, "Packet found for unknown port=%d" + " rem_port=%d refcnt=%d\n", + nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count)); + + cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq); + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + send_reset(cm_node); + } + rem_ref_cm_node(cm_core, cm_node); + ret = -1; + goto out; + } + add_ref_cm_node(cm_node); + cm_node->state = NES_CM_STATE_LISTENING; + } + + nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n", + cm_node, skb->data); + process_packet(cm_node, skb, cm_core); + + rem_ref_cm_node(cm_core, cm_node); + out: + if (skb) + dev_kfree_skb_any(skb); + return ret; +} + + +/** + * nes_cm_alloc_core - allocate a top level instance of a cm core + */ +struct nes_cm_core *nes_cm_alloc_core(void) +{ + int i; + + struct nes_cm_core *cm_core; + struct sk_buff *skb = NULL; + + /* setup the CM core */ + /* alloc top level core control structure */ + cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL); + if (!cm_core) + return NULL; + + INIT_LIST_HEAD(&cm_core->connected_nodes); + init_timer(&cm_core->tcp_timer); + cm_core->tcp_timer.function = nes_cm_timer_tick; + + cm_core->mtu = NES_CM_DEFAULT_MTU; + cm_core->state = NES_CM_STATE_INITED; + cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS; + + atomic_set(&cm_core->session_id, 0); + atomic_set(&cm_core->events_posted, 0); + + /* init the packet lists */ + skb_queue_head_init(&cm_core->tx_free_list); + + for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) { + skb = dev_alloc_skb(cm_core->mtu); + if (!skb) { + kfree(cm_core); + return NULL; + } + /* add 'raw' skb to free frame list */ + skb_queue_head(&cm_core->tx_free_list, skb); + } + + cm_core->api = &nes_cm_api; + + spin_lock_init(&cm_core->ht_lock); + spin_lock_init(&cm_core->listen_list_lock); + + INIT_LIST_HEAD(&cm_core->listen_list.list); + + nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core); + + nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); + cm_core->event_wq = create_singlethread_workqueue("nesewq"); + cm_core->post_event = nes_cm_post_event; + nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); + cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); + + print_core(cm_core); + return cm_core; +} + + +/** + * mini_cm_dealloc_core - deallocate a top level instance of a cm core + */ +int mini_cm_dealloc_core(struct nes_cm_core *cm_core) +{ + nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core); + + if (!cm_core) + return -EINVAL; + + barrier(); + + if (timer_pending(&cm_core->tcp_timer)) { + del_timer(&cm_core->tcp_timer); + } + + destroy_workqueue(cm_core->event_wq); + destroy_workqueue(cm_core->disconn_wq); + nes_debug(NES_DBG_CM, "\n"); + kfree(cm_core); + + return 0; +} + + +/** + * mini_cm_get + */ +int mini_cm_get(struct nes_cm_core *cm_core) +{ + return cm_core->state; +} + + +/** + * mini_cm_set + */ +int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value) +{ + int ret = 0; + + switch (type) { + case NES_CM_SET_PKT_SIZE: + cm_core->mtu = value; + break; + case NES_CM_SET_FREE_PKT_Q_SIZE: + cm_core->free_tx_pkt_max = value; + break; + default: + /* unknown set option */ + ret = -EINVAL; + } + + return ret; +} + + +/** + * nes_cm_init_tsa_conn setup HW; MPA frames must be + * successfully exchanged when this is called + */ +static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node) +{ + int ret = 0; + + if (!nesqp) + return -EINVAL; + + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 | + NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | + NES_QPCONTEXT_MISC_DROS); + + if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale) + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE); + + nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); + + nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); + + nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( + (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); + + nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( + (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); + + nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( + (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); + + nesqp->nesqp_context->keepalive = cpu_to_le32(0x80); + nesqp->nesqp_context->ts_recent = 0; + nesqp->nesqp_context->ts_age = 0; + nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd); + nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); + nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd << + cm_node->tcp_cntxt.rcv_wscale); + nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->srtt = 0; + nesqp->nesqp_context->rttvar = cpu_to_le32(0x6); + nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000); + nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss); + nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); + nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd); + + nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X," + " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", + nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), + le32_to_cpu(nesqp->nesqp_context->rcv_wnd), + le32_to_cpu(nesqp->nesqp_context->misc)); + nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd)); + nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd)); + nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd)); + + nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n"); + cm_node->state = NES_CM_STATE_TSA; + + return ret; +} + + +/** + * nes_cm_disconn + */ +int nes_cm_disconn(struct nes_qp *nesqp) +{ + unsigned long flags; + + spin_lock_irqsave(&nesqp->lock, flags); + if (nesqp->disconn_pending == 0) { + nesqp->disconn_pending++; + spin_unlock_irqrestore(&nesqp->lock, flags); + /* nes_add_ref(&nesqp->ibqp); */ + /* init our disconnect work element, to */ + INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker); + + queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); + } else { + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_rem_ref(&nesqp->ibqp); + } + + return 0; +} + + +/** + * nes_disconnect_worker + */ +void nes_disconnect_worker(struct work_struct *work) +{ + struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work); + + nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", + nesqp->last_aeq, nesqp->hwqp.qp_id); + nes_cm_disconn_true(nesqp); +} + + +/** + * nes_cm_disconn_true + */ +int nes_cm_disconn_true(struct nes_qp *nesqp) +{ + unsigned long flags; + int ret = 0; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_vnic *nesvnic; + u16 last_ae; + u8 original_hw_tcp_state; + u8 original_ibqp_state; + u8 issued_disconnect_reset = 0; + + if (!nesqp) { + nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n"); + return -1; + } + + spin_lock_irqsave(&nesqp->lock, flags); + cm_id = nesqp->cm_id; + /* make sure we havent already closed this connection */ + if (!cm_id) { + nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", + nesqp->hwqp.qp_id); + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_rem_ref(&nesqp->ibqp); + return -1; + } + + nesvnic = to_nesvnic(nesqp->ibqp.device); + nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id); + + original_hw_tcp_state = nesqp->hw_tcp_state; + original_ibqp_state = nesqp->ibqp_state; + last_ae = nesqp->last_aeq; + + + nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state); + + if ((nesqp->cm_id) && (cm_id->event_handler)) { + if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || + ((original_ibqp_state == IB_QPS_RTS) && + (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + atomic_inc(&cm_disconnects); + cm_event.event = IW_CM_EVENT_DISCONNECT; + if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { + issued_disconnect_reset = 1; + cm_event.status = IW_CM_EVENT_STATUS_RESET; + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for " + " QP%u, cm_id = %p. \n", + nesqp->hwqp.qp_id, cm_id); + } else { + cm_event.status = IW_CM_EVENT_STATUS_OK; + } + + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for " + " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, + nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); + + spin_unlock_irqrestore(&nesqp->lock, flags); + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + spin_lock_irqsave(&nesqp->lock, flags); + } + + nesqp->disconn_pending = 0; + /* There might have been another AE while the lock was released */ + original_hw_tcp_state = nesqp->hw_tcp_state; + original_ibqp_state = nesqp->ibqp_state; + last_ae = nesqp->last_aeq; + + if ((issued_disconnect_reset == 0) && (nesqp->cm_id) && + ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) || + (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) || + (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) || + (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + atomic_inc(&cm_closes); + nesqp->cm_id = NULL; + nesqp->in_disconnect = 0; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_disconnect(nesqp, 1); + + cm_id->provider_data = nesqp; + /* Send up the close complete event */ + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = IW_CM_EVENT_STATUS_OK; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) { + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + } + + cm_id->rem_ref(cm_id); + + spin_lock_irqsave(&nesqp->lock, flags); + if (nesqp->flush_issued == 0) { + nesqp->flush_issued = 1; + spin_unlock_irqrestore(&nesqp->lock, flags); + flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1); + } else { + spin_unlock_irqrestore(&nesqp->lock, flags); + } + + /* This reference is from either ModifyQP or the AE processing, + there is still a race here with modifyqp */ + nes_rem_ref(&nesqp->ibqp); + + } else { + cm_id = nesqp->cm_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + /* check to see if the inbound reset beat the outbound reset */ + if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) { + nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset" + " beating the outbound reset.\n", + nesqp->hwqp.qp_id); + nes_rem_ref(&nesqp->ibqp); + } + } + } else { + nesqp->disconn_pending = 0; + spin_unlock_irqrestore(&nesqp->lock, flags); + } + nes_rem_ref(&nesqp->ibqp); + + return 0; +} + + +/** + * nes_disconnect + */ +int nes_disconnect(struct nes_qp *nesqp, int abrupt) +{ + int ret = 0; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + + nesvnic = to_nesvnic(nesqp->ibqp.device); + if (!nesvnic) + return -EINVAL; + + nesdev = nesvnic->nesdev; + + nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", + atomic_read(&nesvnic->netdev->refcnt)); + + if (nesqp->active_conn) { + + /* indicate this connection is NOT active */ + nesqp->active_conn = 0; + } else { + /* Need to free the Last Streaming Mode Message */ + if (nesqp->ietf_frame) { + pci_free_consistent(nesdev->pcidev, + nesqp->private_data_len+sizeof(struct ietf_mpa_frame), + nesqp->ietf_frame, nesqp->ietf_frame_pbase); + } + } + + /* close the CM node down if it is still active */ + if (nesqp->cm_node) { + nes_debug(NES_DBG_CM, "Call close API\n"); + + g_cm_core->api->close(g_cm_core, nesqp->cm_node); + nesqp->cm_node = NULL; + } + + return ret; +} + + +/** + * nes_accept + */ +int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + u64 u64temp; + struct ib_qp *ibqp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_cm_node *cm_node; + struct nes_adapter *adapter; + struct ib_qp_attr attr; + struct iw_cm_event cm_event; + struct nes_hw_qp_wqe *wqe; + struct nes_v4_quad nes_quad; + int ret; + + ibqp = nes_get_qp(cm_id->device, conn_param->qpn); + if (!ibqp) + return -EINVAL; + + /* get all our handles */ + nesqp = to_nesqp(ibqp); + nesvnic = to_nesvnic(nesqp->ibqp.device); + nesdev = nesvnic->nesdev; + adapter = nesdev->nesadapter; + + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", + nesvnic, nesvnic->netdev, nesvnic->netdev->name); + + /* since this is from a listen, we were able to put node handle into cm_id */ + cm_node = (struct nes_cm_node *)cm_id->provider_data; + + /* associate the node with the QP */ + nesqp->cm_node = (void *)cm_node; + + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n", + nesqp->hwqp.qp_id, cm_node, jiffies); + atomic_inc(&cm_accepts); + + nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", + atomic_read(&nesvnic->netdev->refcnt)); + + /* allocate the ietf frame and space for private data */ + nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, + sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); + + if (!nesqp->ietf_frame) { + nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); + return -ENOMEM; + } + + + /* setup the MPA frame */ + nesqp->private_data_len = conn_param->private_data_len; + memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); + + memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); + + nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len); + nesqp->ietf_frame->rev = mpa_version; + nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; + + /* setup our first outgoing iWarp send WQE (the IETF frame response) */ + wqe = &nesqp->hwqp.sq_vbase[0]; + + if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) { + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesqp->ietf_frame_pbase); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( + NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); + } + nesqp->skip_lsmm = 1; + + + /* Cache the cm_id in the qp */ + nesqp->cm_id = cm_id; + cm_node->cm_id = cm_id; + + /* nesqp->cm_node = (void *)cm_id->provider_data; */ + cm_id->provider_data = nesqp; + nesqp->active_conn = 0; + + nes_cm_init_tsa_conn(nesqp, cm_node); + + nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + + nesqp->nesqp_context->misc2 |= cpu_to_le32( + (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + + nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( + nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + NES_ARP_RESOLVE) << 16); + + nesqp->nesqp_context->ts_val_delta = cpu_to_le32( + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + + nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); + + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( + ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + + memset(&nes_quad, 0, sizeof(nes_quad)); + nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + + /* Produce hash key */ + nesqp->hte_index = cpu_to_be32( + crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + + nesqp->hte_index &= adapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + + cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); + + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X," + " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n", + nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port), + le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + conn_param->private_data_len+sizeof(struct ietf_mpa_frame)); + + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + + /* notify OF layer that accept event was successfull */ + cm_id->add_ref(cm_id); + + cm_event.event = IW_CM_EVENT_ESTABLISHED; + cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; + cm_event.provider_data = (void *)nesqp; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + ret = cm_id->event_handler(cm_id, &cm_event); + if (cm_node->loopbackpartner) { + cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len; + /* copy entire MPA frame to our cm_node's frame */ + memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data, + nesqp->private_data_len); + create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); + } + if (ret) + printk("%s[%u] OFA CM event_handler returned, ret=%d\n", + __FUNCTION__, __LINE__, ret); + + return 0; +} + + +/** + * nes_reject + */ +int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + struct nes_cm_node *cm_node; + struct nes_cm_core *cm_core; + + atomic_inc(&cm_rejects); + cm_node = (struct nes_cm_node *) cm_id->provider_data; + cm_core = cm_node->cm_core; + cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len; + + strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP); + memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len); + + cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len); + cm_node->mpa_frame.rev = mpa_version; + cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT; + + cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node); + + return 0; +} + + +/** + * nes_connect + * setup and launch cm connect node + */ +int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct ib_qp *ibqp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_cm_node *cm_node; + struct nes_cm_info cm_info; + + ibqp = nes_get_qp(cm_id->device, conn_param->qpn); + if (!ibqp) + return -EINVAL; + nesqp = to_nesqp(ibqp); + if (!nesqp) + return -EINVAL; + nesvnic = to_nesvnic(nesqp->ibqp.device); + if (!nesvnic) + return -EINVAL; + nesdev = nesvnic->nesdev; + if (!nesdev) + return -EINVAL; + + atomic_inc(&cm_connects); + + nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) + + conn_param->private_data_len, GFP_KERNEL); + if (!nesqp->ietf_frame) + return -ENOMEM; + + /* set qp as having an active connection */ + nesqp->active_conn = 1; + + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", + nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port)); + + /* cache the cm_id in the qp */ + nesqp->cm_id = cm_id; + + cm_id->provider_data = nesqp; + + /* copy the private data */ + if (conn_param->private_data_len) { + memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); + } + + nesqp->private_data_len = conn_param->private_data_len; + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); + nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); + + strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ); + nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; + nesqp->ietf_frame->rev = IETF_MPA_VERSION; + nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len); + + if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + + /* set up the connection params for the node */ + cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr); + cm_info.loc_port = (cm_id->local_addr.sin_port); + cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr); + cm_info.rem_port = (cm_id->remote_addr.sin_port); + cm_info.cm_id = cm_id; + cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + + cm_id->add_ref(cm_id); + nes_add_ref(&nesqp->ibqp); + + /* create a connect CM node connection */ + cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info); + if (!cm_node) { + if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + nes_rem_ref(&nesqp->ibqp); + kfree(nesqp->ietf_frame); + nesqp->ietf_frame = NULL; + cm_id->rem_ref(cm_id); + return -ENOMEM; + } + + cm_node->apbvt_set = 1; + nesqp->cm_node = cm_node; + + return 0; +} + + +/** + * nes_create_listen + */ +int nes_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct nes_vnic *nesvnic; + struct nes_cm_listener *cm_node; + struct nes_cm_info cm_info; + struct nes_adapter *adapter; + int err; + + + nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", + cm_id, ntohs(cm_id->local_addr.sin_port)); + + nesvnic = to_nesvnic(cm_id->device); + if (!nesvnic) + return -EINVAL; + adapter = nesvnic->nesdev->nesadapter; + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", + nesvnic, nesvnic->netdev, nesvnic->netdev->name); + + nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n", + nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr); + + /* setup listen params in our api call struct */ + cm_info.loc_addr = nesvnic->local_ipaddr; + cm_info.loc_port = cm_id->local_addr.sin_port; + cm_info.backlog = backlog; + cm_info.cm_id = cm_id; + + cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + + + cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); + if (!cm_node) { + printk("%s[%u] Error returned from listen API call\n", + __FUNCTION__, __LINE__); + return -ENOMEM; + } + + cm_id->provider_data = cm_node; + + if (!cm_node->reused_node) { + err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + if (err) { + printk("nes_manage_apbvt call returned %d.\n", err); + g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); + return err; + } + cm_listens_created++; + } + + cm_id->add_ref(cm_id); + cm_id->provider_data = (void *)cm_node; + + + return 0; +} + + +/** + * nes_destroy_listen + */ +int nes_destroy_listen(struct iw_cm_id *cm_id) +{ + if (cm_id->provider_data) + g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data); + else + nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n"); + + cm_id->rem_ref(cm_id); + + return 0; +} + + +/** + * nes_cm_recv + */ +int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) +{ + cm_packets_received++; + if ((g_cm_core) && (g_cm_core->api)) { + g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); + } else { + nes_debug(NES_DBG_CM, "Unable to process packet for CM," + " cm is not setup properly.\n"); + } + + return 0; +} + + +/** + * nes_cm_start + * Start and init a cm core module + */ +int nes_cm_start(void) +{ + nes_debug(NES_DBG_CM, "\n"); + /* create the primary CM core, pass this handle to subsequent core inits */ + g_cm_core = nes_cm_alloc_core(); + if (g_cm_core) { + return 0; + } else { + return -ENOMEM; + } +} + + +/** + * nes_cm_stop + * stop and dealloc all cm core instances + */ +int nes_cm_stop(void) +{ + g_cm_core->api->destroy_cm_core(g_cm_core); + return 0; +} + + +/** + * cm_event_connected + * handle a connected event, setup QPs and HW + */ +void cm_event_connected(struct nes_cm_event *event) +{ + u64 u64temp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_cm_node *cm_node; + struct nes_adapter *nesadapter; + struct ib_qp_attr attr; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_hw_qp_wqe *wqe; + struct nes_v4_quad nes_quad; + int ret; + + /* get all our handles */ + cm_node = event->cm_node; + cm_id = cm_node->cm_id; + nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id); + nesqp = (struct nes_qp *)cm_id->provider_data; + nesvnic = to_nesvnic(nesqp->ibqp.device); + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + if (nesqp->destroyed) { + return; + } + atomic_inc(&cm_connecteds); + nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on" + " local port 0x%04X. jiffies = %lu.\n", + nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohs(cm_id->local_addr.sin_port), + jiffies); + + nes_cm_init_tsa_conn(nesqp, cm_node); + + /* set the QP tsa context */ + nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + + nesqp->nesqp_context->misc2 |= cpu_to_le32( + (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( + nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), + NULL, NES_ARP_RESOLVE) << 16); + nesqp->nesqp_context->ts_val_delta = cpu_to_le32( + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + + /* Adjust tail for not having a LSMM */ + nesqp->hwqp.sq_tail = 1; + +#if defined(NES_SEND_FIRST_WRITE) + if (cm_node->send_write0) { + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe = &nesqp->hwqp.sq_vbase[0]; + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + /* use the reserved spot on the WQ for the extra first WQE */ + nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + } +#endif + + memset(&nes_quad, 0, sizeof(nes_quad)); + + nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + + /* Produce hash key */ + nesqp->hte_index = cpu_to_be32( + crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + + nesqp->hte_index &= nesadapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + + nesqp->ietf_frame = &cm_node->mpa_frame; + nesqp->private_data_len = (u8) cm_node->mpa_frame_size; + cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); + + /* modify QP state to rts */ + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + + /* notify OF layer we successfully created the requested connection */ + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr.sin_family = AF_INET; + cm_event.local_addr.sin_port = cm_id->local_addr.sin_port; + cm_event.remote_addr = cm_id->remote_addr; + + cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; + + cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + + if (ret) + printk("%s[%u] OFA CM event_handler returned, ret=%d\n", + __FUNCTION__, __LINE__, ret); + nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n", + nesqp->hwqp.qp_id, jiffies ); + + nes_rem_ref(&nesqp->ibqp); + + return; +} + + +/** + * cm_event_connect_error + */ +void cm_event_connect_error(struct nes_cm_event *event) +{ + struct nes_qp *nesqp; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + /* struct nes_cm_info cm_info; */ + int ret; + + if (!event->cm_node) + return; + + cm_id = event->cm_node->cm_id; + if (!cm_id) { + return; + } + + nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id); + nesqp = cm_id->provider_data; + + if (!nesqp) { + return; + } + + /* notify OF layer about this connection error event */ + /* cm_id->rem_ref(cm_id); */ + nesqp->cm_id = NULL; + cm_id->provider_data = NULL; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = IW_CM_EVENT_STATUS_REJECTED; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n", + cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr); + + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + if (ret) + printk("%s[%u] OFA CM event_handler returned, ret=%d\n", + __FUNCTION__, __LINE__, ret); + nes_rem_ref(&nesqp->ibqp); + cm_id->rem_ref(cm_id); + + return; +} + + +/** + * cm_event_reset + */ +void cm_event_reset(struct nes_cm_event *event) +{ + struct nes_qp *nesqp; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + /* struct nes_cm_info cm_info; */ + int ret; + + if (!event->cm_node) + return; + + if (!event->cm_node->cm_id) + return; + + cm_id = event->cm_node->cm_id; + + nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id); + nesqp = cm_id->provider_data; + + nesqp->cm_id = NULL; + /* cm_id->provider_data = NULL; */ + cm_event.event = IW_CM_EVENT_DISCONNECT; + cm_event.status = IW_CM_EVENT_STATUS_RESET; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + + + /* notify OF layer about this connection error event */ + cm_id->rem_ref(cm_id); + + return; +} + + +/** + * cm_event_mpa_req + */ +void cm_event_mpa_req(struct nes_cm_event *event) +{ + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + int ret; + struct nes_cm_node *cm_node; + + cm_node = event->cm_node; + if (!cm_node) + return; + cm_id = cm_node->cm_id; + + atomic_inc(&cm_connect_reqs); + nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", + cm_node, cm_id, jiffies); + + cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; + cm_event.status = IW_CM_EVENT_STATUS_OK; + cm_event.provider_data = (void *)cm_node; + + cm_event.local_addr.sin_family = AF_INET; + cm_event.local_addr.sin_port = htons(event->cm_info.loc_port); + cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr); + + cm_event.remote_addr.sin_family = AF_INET; + cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); + cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); + + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) cm_node->mpa_frame_size; + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + printk("%s[%u] OFA CM event_handler returned, ret=%d\n", + __FUNCTION__, __LINE__, ret); + + return; +} + + +static void nes_cm_event_handler(struct work_struct *); + +/** + * nes_cm_post_event + * post an event to the cm event handler + */ +int nes_cm_post_event(struct nes_cm_event *event) +{ + atomic_inc(&event->cm_node->cm_core->events_posted); + add_ref_cm_node(event->cm_node); + event->cm_info.cm_id->add_ref(event->cm_info.cm_id); + INIT_WORK(&event->event_work, nes_cm_event_handler); + nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event); + + queue_work(event->cm_node->cm_core->event_wq, &event->event_work); + + nes_debug(NES_DBG_CM, "Exit\n"); + return 0; +} + + +/** + * nes_cm_event_handler + * worker function to handle cm events + * will free instance of nes_cm_event + */ +static void nes_cm_event_handler(struct work_struct *work) +{ + struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work); + struct nes_cm_core *cm_core; + + if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) { + return; + } + cm_core = event->cm_node->cm_core; + nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", + event, event->type, atomic_read(&cm_core->events_posted)); + + switch (event->type) { + case NES_CM_EVENT_MPA_REQ: + cm_event_mpa_req(event); + nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n"); + break; + case NES_CM_EVENT_RESET: + nes_debug(NES_DBG_CM, "CM Event: RESET\n"); + cm_event_reset(event); + break; + case NES_CM_EVENT_CONNECTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) { + break; + } + cm_event_connected(event); + nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + break; + case NES_CM_EVENT_ABORTED: + if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) { + break; + } + cm_event_connect_error(event); + nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); + break; + case NES_CM_EVENT_DROPPED_PKT: + nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); + break; + default: + nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + break; + } + + atomic_dec(&cm_core->events_posted); + event->cm_info.cm_id->rem_ref(event->cm_info.cm_id); + rem_ref_cm_node(cm_core, event->cm_node); + kfree(event); + + return; +} diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h new file mode 100644 index 000000000000..a59f0a7fb278 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef NES_CM_H +#define NES_CM_H + +#define QUEUE_EVENTS + +#define NES_MANAGE_APBVT_DEL 0 +#define NES_MANAGE_APBVT_ADD 1 + +/* IETF MPA -- defines, enums, structs */ +#define IEFT_MPA_KEY_REQ "MPA ID Req Frame" +#define IEFT_MPA_KEY_REP "MPA ID Rep Frame" +#define IETF_MPA_KEY_SIZE 16 +#define IETF_MPA_VERSION 1 + +enum ietf_mpa_flags { + IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ + IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */ + IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */ +}; + +struct ietf_mpa_frame { + u8 key[IETF_MPA_KEY_SIZE]; + u8 flags; + u8 rev; + __be16 priv_data_len; + u8 priv_data[0]; +}; + +#define ietf_mpa_req_resp_frame ietf_mpa_frame + +struct nes_v4_quad { + u32 rsvd0; + __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */ + __be32 SrcIpadr; + __be16 TcpPorts[2]; /* src is low, dest is high */ +}; + +struct nes_cm_node; +enum nes_timer_type { + NES_TIMER_TYPE_SEND, + NES_TIMER_TYPE_RECV, + NES_TIMER_NODE_CLEANUP, + NES_TIMER_TYPE_CLOSE, +}; + +#define MAX_NES_IFS 4 + +#define SET_ACK 1 +#define SET_SYN 2 +#define SET_FIN 4 +#define SET_RST 8 + +struct option_base { + u8 optionnum; + u8 length; +}; + +enum option_numbers { + OPTION_NUMBER_END, + OPTION_NUMBER_NONE, + OPTION_NUMBER_MSS, + OPTION_NUMBER_WINDOW_SCALE, + OPTION_NUMBER_SACK_PERM, + OPTION_NUMBER_SACK, + OPTION_NUMBER_WRITE0 = 0xbc +}; + +struct option_mss { + u8 optionnum; + u8 length; + __be16 mss; +}; + +struct option_windowscale { + u8 optionnum; + u8 length; + u8 shiftcount; +}; + +union all_known_options { + char as_end; + struct option_base as_base; + struct option_mss as_mss; + struct option_windowscale as_windowscale; +}; + +struct nes_timer_entry { + struct list_head list; + unsigned long timetosend; /* jiffies */ + struct sk_buff *skb; + u32 type; + u32 retrycount; + u32 retranscount; + u32 context; + u32 seq_num; + u32 send_retrans; + int close_when_complete; + struct net_device *netdev; +}; + +#define NES_DEFAULT_RETRYS 64 +#define NES_DEFAULT_RETRANS 8 +#ifdef CONFIG_INFINIBAND_NES_DEBUG +#define NES_RETRY_TIMEOUT (1000*HZ/1000) +#else +#define NES_RETRY_TIMEOUT (3000*HZ/1000) +#endif +#define NES_SHORT_TIME (10) +#define NES_LONG_TIME (2000*HZ/1000) + +#define NES_CM_HASHTABLE_SIZE 1024 +#define NES_CM_TCP_TIMER_INTERVAL 3000 +#define NES_CM_DEFAULT_MTU 1540 +#define NES_CM_DEFAULT_FRAME_CNT 10 +#define NES_CM_THREAD_STACK_SIZE 256 +#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed +#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed +#define NES_CM_DEFAULT_RCV_WND_SCALE 2 +#define NES_CM_DEFAULT_FREE_PKTS 0x000A +#define NES_CM_FREE_PKT_LO_WATERMARK 2 + +#define NES_CM_DEFAULT_MSS 536 + +#define NES_CM_DEF_SEQ 0x159bf75f +#define NES_CM_DEF_LOCAL_ID 0x3b47 + +#define NES_CM_DEF_SEQ2 0x18ed5740 +#define NES_CM_DEF_LOCAL_ID2 0xb807 + +typedef u32 nes_addr_t; + +#define nes_cm_tsa_context nes_qp_context + +struct nes_qp; + +/* cm node transition states */ +enum nes_cm_node_state { + NES_CM_STATE_UNKNOWN, + NES_CM_STATE_INITED, + NES_CM_STATE_LISTENING, + NES_CM_STATE_SYN_RCVD, + NES_CM_STATE_SYN_SENT, + NES_CM_STATE_ONE_SIDE_ESTABLISHED, + NES_CM_STATE_ESTABLISHED, + NES_CM_STATE_ACCEPTING, + NES_CM_STATE_MPAREQ_SENT, + NES_CM_STATE_TSA, + NES_CM_STATE_FIN_WAIT1, + NES_CM_STATE_FIN_WAIT2, + NES_CM_STATE_CLOSE_WAIT, + NES_CM_STATE_TIME_WAIT, + NES_CM_STATE_LAST_ACK, + NES_CM_STATE_CLOSING, + NES_CM_STATE_CLOSED +}; + +/* type of nes connection */ +enum nes_cm_conn_type { + NES_CM_IWARP_CONN_TYPE, +}; + +/* CM context params */ +struct nes_cm_tcp_context { + u8 client; + + u32 loc_seq_num; + u32 loc_ack_num; + u32 rem_ack_num; + u32 rcv_nxt; + + u32 loc_id; + u32 rem_id; + + u32 snd_wnd; + u32 max_snd_wnd; + + u32 rcv_wnd; + u32 mss; + u8 snd_wscale; + u8 rcv_wscale; + + struct nes_cm_tsa_context tsa_cntxt; + struct timeval sent_ts; +}; + + +enum nes_cm_listener_state { + NES_CM_LISTENER_PASSIVE_STATE=1, + NES_CM_LISTENER_ACTIVE_STATE=2, + NES_CM_LISTENER_EITHER_STATE=3 +}; + +struct nes_cm_listener { + struct list_head list; + u64 session_id; + struct nes_cm_core *cm_core; + u8 loc_mac[ETH_ALEN]; + nes_addr_t loc_addr; + u16 loc_port; + struct iw_cm_id *cm_id; + enum nes_cm_conn_type conn_type; + atomic_t ref_count; + struct nes_vnic *nesvnic; + atomic_t pend_accepts_cnt; + int backlog; + enum nes_cm_listener_state listener_state; + u32 reused_node; +}; + +/* per connection node and node state information */ +struct nes_cm_node { + u64 session_id; + u32 hashkey; + + nes_addr_t loc_addr, rem_addr; + u16 loc_port, rem_port; + + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; + + enum nes_cm_node_state state; + struct nes_cm_tcp_context tcp_cntxt; + struct nes_cm_core *cm_core; + struct sk_buff_head resend_list; + atomic_t ref_count; + struct net_device *netdev; + + struct nes_cm_node *loopbackpartner; + struct list_head retrans_list; + spinlock_t retrans_list_lock; + struct list_head recv_list; + spinlock_t recv_list_lock; + + int send_write0; + union { + struct ietf_mpa_frame mpa_frame; + u8 mpa_frame_buf[NES_CM_DEFAULT_MTU]; + }; + u16 mpa_frame_size; + struct iw_cm_id *cm_id; + struct list_head list; + int accelerated; + struct nes_cm_listener *listener; + enum nes_cm_conn_type conn_type; + struct nes_vnic *nesvnic; + int apbvt_set; + int accept_pend; +}; + +/* structure for client or CM to fill when making CM api calls. */ +/* - only need to set relevant data, based on op. */ +struct nes_cm_info { + union { + struct iw_cm_id *cm_id; + struct net_device *netdev; + }; + + u16 loc_port; + u16 rem_port; + nes_addr_t loc_addr; + nes_addr_t rem_addr; + + enum nes_cm_conn_type conn_type; + int backlog; +}; + +/* CM event codes */ +enum nes_cm_event_type { + NES_CM_EVENT_UNKNOWN, + NES_CM_EVENT_ESTABLISHED, + NES_CM_EVENT_MPA_REQ, + NES_CM_EVENT_MPA_CONNECT, + NES_CM_EVENT_MPA_ACCEPT, + NES_CM_EVENT_MPA_ESTABLISHED, + NES_CM_EVENT_CONNECTED, + NES_CM_EVENT_CLOSED, + NES_CM_EVENT_RESET, + NES_CM_EVENT_DROPPED_PKT, + NES_CM_EVENT_CLOSE_IMMED, + NES_CM_EVENT_CLOSE_HARD, + NES_CM_EVENT_CLOSE_CLEAN, + NES_CM_EVENT_ABORTED, + NES_CM_EVENT_SEND_FIRST +}; + +/* event to post to CM event handler */ +struct nes_cm_event { + enum nes_cm_event_type type; + + struct nes_cm_info cm_info; + struct work_struct event_work; + struct nes_cm_node *cm_node; +}; + +struct nes_cm_core { + enum nes_cm_node_state state; + atomic_t session_id; + + atomic_t listen_node_cnt; + struct nes_cm_node listen_list; + spinlock_t listen_list_lock; + + u32 mtu; + u32 free_tx_pkt_max; + u32 rx_pkt_posted; + struct sk_buff_head tx_free_list; + atomic_t ht_node_cnt; + struct list_head connected_nodes; + /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ + spinlock_t ht_lock; + + struct timer_list tcp_timer; + + struct nes_cm_ops *api; + + int (*post_event)(struct nes_cm_event *event); + atomic_t events_posted; + struct workqueue_struct *event_wq; + struct workqueue_struct *disconn_wq; + + atomic_t node_cnt; + u64 aborted_connects; + u32 options; + + struct nes_cm_node *current_listen_node; +}; + + +#define NES_CM_SET_PKT_SIZE (1 << 1) +#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2) + +/* CM ops/API for client interface */ +struct nes_cm_ops { + int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *); + struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *, + struct nes_cm_info *); + int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); + struct nes_cm_node * (*connect)(struct nes_cm_core *, + struct nes_vnic *, struct ietf_mpa_frame *, + struct nes_cm_info *); + int (*close)(struct nes_cm_core *, struct nes_cm_node *); + int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *, + struct nes_cm_node *); + int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, + struct nes_cm_node *); + int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, + struct sk_buff *); + int (*destroy_cm_core)(struct nes_cm_core *); + int (*get)(struct nes_cm_core *); + int (*set)(struct nes_cm_core *, u32, u32); +}; + + +int send_mpa_request(struct nes_cm_node *); +struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, + void *, u32, void *, u32, u8); +int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *, + enum nes_timer_type, int, int); +void nes_cm_timer_tick(unsigned long); +int send_syn(struct nes_cm_node *, u32); +int send_reset(struct nes_cm_node *); +int send_ack(struct nes_cm_node *); +int send_fin(struct nes_cm_node *, struct sk_buff *); +struct sk_buff *get_free_pkt(struct nes_cm_node *); +int process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *); + +struct nes_cm_node * mini_cm_connect(struct nes_cm_core *, + struct nes_vnic *, struct ietf_mpa_frame *, struct nes_cm_info *); +int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); +int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); +int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); +int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); +struct nes_cm_core *mini_cm_alloc_core(struct nes_cm_info *); +int mini_cm_dealloc_core(struct nes_cm_core *); +int mini_cm_get(struct nes_cm_core *); +int mini_cm_set(struct nes_cm_core *, u32, u32); + +int nes_cm_disconn(struct nes_qp *); +void nes_disconnect_worker(struct work_struct *); +int nes_cm_disconn_true(struct nes_qp *); +int nes_disconnect(struct nes_qp *, int); + +int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *); +int nes_reject(struct iw_cm_id *, const void *, u8); +int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *); +int nes_create_listen(struct iw_cm_id *, int); +int nes_destroy_listen(struct iw_cm_id *); + +int nes_cm_recv(struct sk_buff *, struct net_device *); +int nes_cm_start(void); +int nes_cm_stop(void); + +/* CM event handler functions */ +void cm_event_connected(struct nes_cm_event *); +void cm_event_connect_error(struct nes_cm_event *); +void cm_event_reset(struct nes_cm_event *); +void cm_event_mpa_req(struct nes_cm_event *); +int nes_cm_post_event(struct nes_cm_event *); + +#endif /* NES_CM_H */ diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h new file mode 100644 index 000000000000..da9daba8e668 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef NES_CONTEXT_H +#define NES_CONTEXT_H + +struct nes_qp_context { + __le32 misc; + __le32 cqs; + __le32 sq_addr_low; + __le32 sq_addr_high; + __le32 rq_addr_low; + __le32 rq_addr_high; + __le32 misc2; + __le16 tcpPorts[2]; + __le32 ip0; + __le32 ip1; + __le32 ip2; + __le32 ip3; + __le32 mss; + __le32 arp_index_vlan; + __le32 tcp_state_flow_label; + __le32 pd_index_wscale; + __le32 keepalive; + u32 ts_recent; + u32 ts_age; + __le32 snd_nxt; + __le32 snd_wnd; + __le32 rcv_nxt; + __le32 rcv_wnd; + __le32 snd_max; + __le32 snd_una; + u32 srtt; + __le32 rttvar; + __le32 ssthresh; + __le32 cwnd; + __le32 snd_wl1; + __le32 snd_wl2; + __le32 max_snd_wnd; + __le32 ts_val_delta; + u32 retransmit; + u32 probe_cnt; + u32 hte_index; + __le32 q2_addr_low; + __le32 q2_addr_high; + __le32 ird_index; + u32 Rsvd3; + __le32 ird_ord_sizes; + u32 mrkr_offset; + __le32 aeq_token_low; + __le32 aeq_token_high; +}; + +/* QP Context Misc Field */ + +#define NES_QPCONTEXT_MISC_IWARP_VER_MASK 0x00000003 +#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT 0 +#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK 0x000000C0 +#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT 6 +#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK 0x00000300 +#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT 8 +#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK 0x00000c00 +#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT 10 +#define NES_QPCONTEXT_MISC_PCI_FCN_MASK 0x00007000 +#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT 12 +#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK 0x00070000 +#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT 16 + +enum nes_qp_context_misc_bits { + NES_QPCONTEXT_MISC_RX_WQE_SIZE = 0x00000004, + NES_QPCONTEXT_MISC_IPV4 = 0x00000008, + NES_QPCONTEXT_MISC_DO_NOT_FRAG = 0x00000010, + NES_QPCONTEXT_MISC_INSERT_VLAN = 0x00000020, + NES_QPCONTEXT_MISC_DROS = 0x00008000, + NES_QPCONTEXT_MISC_WSCALE = 0x00080000, + NES_QPCONTEXT_MISC_KEEPALIVE = 0x00100000, + NES_QPCONTEXT_MISC_TIMESTAMP = 0x00200000, + NES_QPCONTEXT_MISC_SACK = 0x00400000, + NES_QPCONTEXT_MISC_RDMA_WRITE_EN = 0x00800000, + NES_QPCONTEXT_MISC_RDMA_READ_EN = 0x01000000, + NES_QPCONTEXT_MISC_WBIND_EN = 0x10000000, + NES_QPCONTEXT_MISC_FAST_REGISTER_EN = 0x20000000, + NES_QPCONTEXT_MISC_PRIV_EN = 0x40000000, + NES_QPCONTEXT_MISC_NO_NAGLE = 0x80000000 +}; + +enum nes_qp_acc_wq_sizes { + HCONTEXT_TSA_WQ_SIZE_4 = 0, + HCONTEXT_TSA_WQ_SIZE_32 = 1, + HCONTEXT_TSA_WQ_SIZE_128 = 2, + HCONTEXT_TSA_WQ_SIZE_512 = 3 +}; + +/* QP Context Misc2 Fields */ +#define NES_QPCONTEXT_MISC2_TTL_MASK 0x000000ff +#define NES_QPCONTEXT_MISC2_TTL_SHIFT 0 +#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK 0x000000ff +#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT 0 +#define NES_QPCONTEXT_MISC2_LIMIT_MASK 0x00000300 +#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT 8 +#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK 0x0000fc00 +#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT 10 +#define NES_QPCONTEXT_MISC2_SRC_IP_MASK 0x001f0000 +#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT 16 +#define NES_QPCONTEXT_MISC2_TOS_MASK 0xff000000 +#define NES_QPCONTEXT_MISC2_TOS_SHIFT 24 +#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK 0xff000000 +#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24 + +/* QP Context Tcp State/Flow Label Fields */ +#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK 0x000fffff +#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT 0 +#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK 0xf0000000 +#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT 28 + +enum nes_qp_tcp_state { + NES_QPCONTEXT_TCPSTATE_CLOSED = 1, + NES_QPCONTEXT_TCPSTATE_EST = 5, + NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11, +}; + +/* QP Context PD Index/wscale Fields */ +#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK 0x0000000f +#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0 +#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK 0x00000f00 +#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8 +#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK 0xffff0000 +#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT 16 + +/* QP Context Keepalive Fields */ +#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK 0x0000ffff +#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT 0 +#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK 0x00ff0000 +#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16 +#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK 0xff000000 +#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT 24 + +/* QP Context ORD/IRD Fields */ +#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK 0x0000007f +#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT 0 +#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK 0x00030000 +#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT 16 +#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK 0x30000000 +#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT 28 + +enum nes_ord_ird_bits { + NES_QPCONTEXT_ORDIRD_WRPDU = 0x02000000, + NES_QPCONTEXT_ORDIRD_LSMM_PRESENT = 0x04000000, + NES_QPCONTEXT_ORDIRD_ALSMM = 0x08000000, + NES_QPCONTEXT_ORDIRD_AAH = 0x40000000, + NES_QPCONTEXT_ORDIRD_RNMC = 0x80000000 +}; + +enum nes_iwarp_qp_state { + NES_QPCONTEXT_IWARP_STATE_NONEXIST = 0, + NES_QPCONTEXT_IWARP_STATE_IDLE = 1, + NES_QPCONTEXT_IWARP_STATE_RTS = 2, + NES_QPCONTEXT_IWARP_STATE_CLOSING = 3, + NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5, + NES_QPCONTEXT_IWARP_STATE_ERROR = 6 +}; + + +#endif /* NES_CONTEXT_H */ diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c new file mode 100644 index 000000000000..7c4c0fbf0abd --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -0,0 +1,3080 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/if_vlan.h> + +#include "nes.h" + +u32 crit_err_count = 0; +u32 int_mod_timer_init; +u32 int_mod_cq_depth_256; +u32 int_mod_cq_depth_128; +u32 int_mod_cq_depth_32; +u32 int_mod_cq_depth_24; +u32 int_mod_cq_depth_16; +u32 int_mod_cq_depth_4; +u32 int_mod_cq_depth_1; + +#include "nes_cm.h" + + +#ifdef CONFIG_INFINIBAND_NES_DEBUG +static unsigned char *nes_iwarp_state_str[] = { + "Non-Existant", + "Idle", + "RTS", + "Closing", + "RSVD1", + "Terminate", + "Error", + "RSVD2", +}; + +static unsigned char *nes_tcp_state_str[] = { + "Non-Existant", + "Closed", + "Listen", + "SYN Sent", + "SYN Rcvd", + "Established", + "Close Wait", + "FIN Wait 1", + "Closing", + "Last Ack", + "FIN Wait 2", + "Time Wait", + "RSVD1", + "RSVD2", + "RSVD3", + "RSVD4", +}; +#endif + + +/** + * nes_nic_init_timer_defaults + */ +void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW; + shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH; + if (jumbomode) { + shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW; + shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET; + shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH; + } else { + shared_timer->threshold_low = DEFAULT_NES_QL_LOW; + shared_timer->threshold_target = DEFAULT_NES_QL_TARGET; + shared_timer->threshold_high = DEFAULT_NES_QL_HIGH; + } + + /* todo use netdev->mtu to set thresholds */ + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_nic_init_timer + */ +static void nes_nic_init_timer(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + if (shared_timer->timer_in_use_old == 0) { + nesdev->deepcq_count = 0; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + shared_timer->timer_in_use = NES_NIC_FAST_TIMER; + shared_timer->timer_in_use_old = 0; + + } + if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) { + shared_timer->timer_in_use_old = shared_timer->timer_in_use; + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, + 0x80000000 | ((u32)(shared_timer->timer_in_use*8))); + } + /* todo use netdev->mtu to set thresholds */ + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_nic_tune_timer + */ +static void nes_nic_tune_timer(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + u16 cq_count = nesdev->currcq_count; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + if (shared_timer->cq_count_old < cq_count) { + if (cq_count > shared_timer->threshold_low) + shared_timer->cq_direction_downward=0; + } + if (shared_timer->cq_count_old >= cq_count) + shared_timer->cq_direction_downward++; + shared_timer->cq_count_old = cq_count; + if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) { + if (cq_count <= shared_timer->threshold_low) { + shared_timer->threshold_low = shared_timer->threshold_low/2; + shared_timer->cq_direction_downward=0; + nesdev->currcq_count = 0; + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + return; + } + } + + if (cq_count > 1) { + nesdev->deepcq_count += cq_count; + if (cq_count <= shared_timer->threshold_low) { /* increase timer gently */ + shared_timer->timer_direction_upward++; + shared_timer->timer_direction_downward = 0; + } else if (cq_count <= shared_timer->threshold_target) { /* balanced */ + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + } else if (cq_count <= shared_timer->threshold_high) { /* decrease timer gently */ + shared_timer->timer_direction_downward++; + shared_timer->timer_direction_upward = 0; + } else if (cq_count <= (shared_timer->threshold_high) * 2) { + shared_timer->timer_in_use -= 2; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward++; + } else { + shared_timer->timer_in_use -= 4; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward++; + } + + if (shared_timer->timer_direction_upward > 3 ) { /* using history */ + shared_timer->timer_in_use += 3; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + } + if (shared_timer->timer_direction_downward > 5) { /* using history */ + shared_timer->timer_in_use -= 4 ; + shared_timer->timer_direction_downward = 0; + shared_timer->timer_direction_upward = 0; + } + } + + /* boundary checking */ + if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH) + shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH; + else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) { + shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW; + } + + nesdev->currcq_count = 0; + + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_init_adapter - initialize adapter + */ +struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { + struct nes_adapter *nesadapter = NULL; + unsigned long num_pds; + u32 u32temp; + u32 port_count; + u16 max_rq_wrs; + u16 max_sq_wrs; + u32 max_mr; + u32 max_256pbl; + u32 max_4kpbl; + u32 max_qp; + u32 max_irrq; + u32 max_cq; + u32 hte_index_mask; + u32 adapter_size; + u32 arp_table_size; + u16 vendor_id; + u8 OneG_Mode; + u8 func_index; + + /* search the list of existing adapters */ + list_for_each_entry(nesadapter, &nes_adapter_list, list) { + nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X," + " adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n", + nesdev->pcidev->devfn, + PCI_SLOT(nesadapter->devfn), + nesadapter->bus_number, + PCI_SLOT(nesdev->pcidev->devfn), + nesdev->pcidev->bus->number ); + if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) && + (nesadapter->bus_number == nesdev->pcidev->bus->number)) { + nesadapter->ref_count++; + return nesadapter; + } + } + + /* no adapter found */ + num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT; + if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) { + nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n", + hw_rev); + return NULL; + } + + nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n", + nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8), + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS), + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4), + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8)); + + nes_debug(NES_DBG_INIT, "Reset and init NE020\n"); + + + if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) + return NULL; + if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode)) + return NULL; + nes_init_csr_ne020(nesdev, hw_rev, port_count); + + max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); + nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); + + u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE); + if (max_qp > ((u32)1 << (u32temp & 0x001f))) { + nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n", + max_qp, u32temp); + max_qp = (u32)1 << (u32temp & 0x001f); + } + + hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1; + nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n", + max_qp, hte_index_mask); + + u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT); + + max_irrq = 1 << (u32temp & 0x001f); + + if (max_qp > max_irrq) { + max_qp = max_irrq; + nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n", + max_qp); + } + + /* there should be no reason to allocate more pds than qps */ + if (num_pds > max_qp) + num_pds = max_qp; + + u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE); + max_mr = (u32)8192 << (u32temp & 0x7); + + u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE); + max_256pbl = (u32)1 << (u32temp & 0x0000001f); + max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f); + max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE); + + u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE); + arp_table_size = 1 << u32temp; + + adapter_size = (sizeof(struct nes_adapter) + + (sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1)); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size); + adapter_size += sizeof(struct nes_qp **) * max_qp; + + /* allocate a new adapter struct */ + nesadapter = kzalloc(adapter_size, GFP_KERNEL); + if (nesadapter == NULL) { + return NULL; + } + + nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", + nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); + + /* populate the new nesadapter */ + nesadapter->devfn = nesdev->pcidev->devfn; + nesadapter->bus_number = nesdev->pcidev->bus->number; + nesadapter->ref_count = 1; + nesadapter->timer_int_req = 0xffff0000; + nesadapter->OneG_Mode = OneG_Mode; + nesadapter->doorbell_start = nesdev->doorbell_region; + + /* nesadapter->tick_delta = clk_divisor; */ + nesadapter->hw_rev = hw_rev; + nesadapter->port_count = port_count; + + nesadapter->max_qp = max_qp; + nesadapter->hte_index_mask = hte_index_mask; + nesadapter->max_irrq = max_irrq; + nesadapter->max_mr = max_mr; + nesadapter->max_256pbl = max_256pbl - 1; + nesadapter->max_4kpbl = max_4kpbl - 1; + nesadapter->max_cq = max_cq; + nesadapter->free_256pbl = max_256pbl - 1; + nesadapter->free_4kpbl = max_4kpbl - 1; + nesadapter->max_pd = num_pds; + nesadapter->arp_table_size = arp_table_size; + + nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT; + if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) { + nesadapter->et_use_adaptive_rx_coalesce = 0; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + } else { + nesadapter->et_use_adaptive_rx_coalesce = 1; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->et_rx_coalesce_usecs_irq = 0; + printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __FUNCTION__); + } + /* Setup and enable the periodic timer */ + if (nesadapter->et_rx_coalesce_usecs_irq) + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | + ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8))); + else + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000); + + nesadapter->base_pd = 1; + + nesadapter->device_cap_flags = + IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW; + + nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) + [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); + nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)]; + nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)]; + nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)]; + nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)]; + nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]); + + + /* mark the usual suspect QPs and CQs as in use */ + for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) { + set_bit(u32temp, nesadapter->allocated_qps); + set_bit(u32temp, nesadapter->allocated_cqs); + } + + for (u32temp = 0; u32temp < 20; u32temp++) + set_bit(u32temp, nesadapter->allocated_pds); + u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES); + + max_rq_wrs = ((u32temp >> 8) & 3); + switch (max_rq_wrs) { + case 0: + max_rq_wrs = 4; + break; + case 1: + max_rq_wrs = 16; + break; + case 2: + max_rq_wrs = 32; + break; + case 3: + max_rq_wrs = 512; + break; + } + + max_sq_wrs = (u32temp & 3); + switch (max_sq_wrs) { + case 0: + max_sq_wrs = 4; + break; + case 1: + max_sq_wrs = 16; + break; + case 2: + max_sq_wrs = 32; + break; + case 3: + max_sq_wrs = 512; + break; + } + nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs); + nesadapter->max_irrq_wr = (u32temp >> 16) & 3; + + nesadapter->max_sge = 4; + nesadapter->max_cqe = 32767; + + if (nes_read_eeprom_values(nesdev, nesadapter)) { + printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); + kfree(nesadapter); + return NULL; + } + + u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG); + nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG, + (u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff)); + + /* setup port configuration */ + if (nesadapter->port_count == 1) { + u32temp = 0x00000000; + if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) + nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002); + else + nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); + } else { + if (nesadapter->port_count == 2) + u32temp = 0x00000044; + else + u32temp = 0x000000e4; + nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); + } + + nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp); + nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n", + nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT)); + + spin_lock_init(&nesadapter->resource_lock); + spin_lock_init(&nesadapter->phy_lock); + spin_lock_init(&nesadapter->pbl_lock); + spin_lock_init(&nesadapter->periodic_timer_lock); + + INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]); + INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]); + INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]); + INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]); + + if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) { + u32 pcs_control_status0, pcs_control_status1; + u32 reset_value; + u32 i = 0; + u32 int_cnt = 0; + u32 ext_cnt = 0; + unsigned long flags; + u32 j = 0; + + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + + for (i = 0; i < NES_MAX_LINK_CHECK; i++) { + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) + || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) + int_cnt++; + msleep(1); + } + if (int_cnt > 1) { + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + mh_detected++; + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + reset_value |= 0x0000003d; + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (j++ < 5000)); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + + for (i = 0; i < NES_MAX_LINK_CHECK; i++) { + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) + || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) { + if (++ext_cnt > int_cnt) { + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, + 0x0000F0C8); + mh_detected++; + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + reset_value |= 0x0000003d; + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (j++ < 5000)); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + break; + } + } + msleep(1); + } + } + } + + if (nesadapter->hw_rev == NE020_REV) { + init_timer(&nesadapter->mh_timer); + nesadapter->mh_timer.function = nes_mh_fix; + nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */ + nesadapter->mh_timer.data = (unsigned long)nesdev; + add_timer(&nesadapter->mh_timer); + } else { + nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000); + } + + init_timer(&nesadapter->lc_timer); + nesadapter->lc_timer.function = nes_clc; + nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ + nesadapter->lc_timer.data = (unsigned long)nesdev; + add_timer(&nesadapter->lc_timer); + + list_add_tail(&nesadapter->list, &nes_adapter_list); + + for (func_index = 0; func_index < 8; func_index++) { + pci_bus_read_config_word(nesdev->pcidev->bus, + PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn), + func_index), 0, &vendor_id); + if (vendor_id == 0xffff) + break; + } + nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __FUNCTION__, + func_index, pci_name(nesdev->pcidev)); + nesadapter->adapter_fcn_count = func_index; + + return nesadapter; +} + + +/** + * nes_reset_adapter_ne020 + */ +unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode) +{ + u32 port_count; + u32 u32temp; + u32 i; + + u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + port_count = ((u32temp & 0x00000300) >> 8) + 1; + /* TODO: assuming that both SERDES are set the same for now */ + *OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1; + nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n", + u32temp, port_count); + if (*OneG_Mode) + nes_debug(NES_DBG_INIT, "Running in 1G mode.\n"); + u32temp &= 0xff00ffc0; + switch (port_count) { + case 1: + u32temp |= 0x00ee0000; + break; + case 2: + u32temp |= 0x00cc0000; + break; + case 4: + u32temp |= 0x00000000; + break; + default: + return 0; + break; + } + + /* check and do full reset if needed */ + if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) { + nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd); + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd); + + i = 0; + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) + mdelay(1); + if (i >= 10000) { + nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); + return 0; + } + } + + /* port reset */ + switch (port_count) { + case 1: + u32temp |= 0x00ee0010; + break; + case 2: + u32temp |= 0x00cc0030; + break; + case 4: + u32temp |= 0x00000030; + break; + } + + nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd); + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd); + + i = 0; + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) + mdelay(1); + if (i >= 10000) { + nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n"); + return 0; + } + + /* serdes 0 */ + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) + & 0x0000000f)) != 0x0000000f) && i++ < 5000) + mdelay(1); + if (i >= 5000) { + nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp); + return 0; + } + + /* serdes 1 */ + if (port_count > 1) { + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) + & 0x0000000f)) != 0x0000000f) && i++ < 5000) + mdelay(1); + if (i >= 5000) { + nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp); + return 0; + } + } + + + + i = 0; + while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) + mdelay(1); + if (i >= 10000) { + printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); + return 0; + } + + return port_count; +} + + +/** + * nes_init_serdes + */ +int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, u8 OneG_Mode) +{ + int i; + u32 u32temp; + + if (hw_rev != NE020_REV) { + /* init serdes 0 */ + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + if (!OneG_Mode) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); + if (port_count > 1) { + /* init serdes 1 */ + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); + if (!OneG_Mode) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); + } + } else { + /* init serdes 0 */ + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) + & 0x0000000f)) != 0x0000000f) && i++ < 5000) + mdelay(1); + if (i >= 5000) { + nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp); + return 1; + } + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000); + if (OneG_Mode) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); + if (port_count > 1) { + /* init serdes 1 */ + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048); + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) + & 0x0000000f)) != 0x0000000f) && (i++ < 5000)) + mdelay(1); + if (i >= 5000) { + printk("%s: Init: serdes 1 not ready, status=%x\n", __FUNCTION__, u32temp); + /* return 1; */ + } + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff); + } + } + return 0; +} + + +/** + * nes_init_csr_ne020 + * Initialize registers for ne020 hardware + */ +void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) +{ + u32 u32temp; + + nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count); + + nes_write_indexed(nesdev, 0x000001E4, 0x00000007); + /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */ + nes_write_indexed(nesdev, 0x000001E8, 0x00020874); + nes_write_indexed(nesdev, 0x000001D8, 0x00048002); + /* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */ + nes_write_indexed(nesdev, 0x000001FC, 0x00050005); + nes_write_indexed(nesdev, 0x00000600, 0x55555555); + nes_write_indexed(nesdev, 0x00000604, 0x55555555); + + /* TODO: move these MAC register settings to NIC bringup */ + nes_write_indexed(nesdev, 0x00002000, 0x00000001); + nes_write_indexed(nesdev, 0x00002004, 0x00000001); + nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000200C, 0x00000001); + nes_write_indexed(nesdev, 0x00002010, 0x000003c1); + nes_write_indexed(nesdev, 0x0000201C, 0x75345678); + if (port_count > 1) { + nes_write_indexed(nesdev, 0x00002200, 0x00000001); + nes_write_indexed(nesdev, 0x00002204, 0x00000001); + nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000220C, 0x00000001); + nes_write_indexed(nesdev, 0x00002210, 0x000003c1); + nes_write_indexed(nesdev, 0x0000221C, 0x75345678); + nes_write_indexed(nesdev, 0x00000908, 0x20000001); + } + if (port_count > 2) { + nes_write_indexed(nesdev, 0x00002400, 0x00000001); + nes_write_indexed(nesdev, 0x00002404, 0x00000001); + nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000240C, 0x00000001); + nes_write_indexed(nesdev, 0x00002410, 0x000003c1); + nes_write_indexed(nesdev, 0x0000241C, 0x75345678); + nes_write_indexed(nesdev, 0x00000910, 0x20000001); + + nes_write_indexed(nesdev, 0x00002600, 0x00000001); + nes_write_indexed(nesdev, 0x00002604, 0x00000001); + nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000260C, 0x00000001); + nes_write_indexed(nesdev, 0x00002610, 0x000003c1); + nes_write_indexed(nesdev, 0x0000261C, 0x75345678); + nes_write_indexed(nesdev, 0x00000918, 0x20000001); + } + + nes_write_indexed(nesdev, 0x00005000, 0x00018000); + /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */ + nes_write_indexed(nesdev, 0x00005004, 0x00020001); + nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF); + + /* TODO: move this to code, get from EEPROM */ + nes_write_indexed(nesdev, 0x00000900, 0x20000001); + nes_write_indexed(nesdev, 0x000060C0, 0x0000028e); + nes_write_indexed(nesdev, 0x000060C8, 0x00000020); + // + nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0); + /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */ + + if (hw_rev != NE020_REV) { + u32temp = nes_read_indexed(nesdev, 0x000008e8); + u32temp |= 0x80000000; + nes_write_indexed(nesdev, 0x000008e8, u32temp); + u32temp = nes_read_indexed(nesdev, 0x000021f8); + u32temp &= 0x7fffffff; + u32temp |= 0x7fff0010; + nes_write_indexed(nesdev, 0x000021f8, u32temp); + } +} + + +/** + * nes_destroy_adapter - destroy the adapter structure + */ +void nes_destroy_adapter(struct nes_adapter *nesadapter) +{ + struct nes_adapter *tmp_adapter; + + list_for_each_entry(tmp_adapter, &nes_adapter_list, list) { + nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n", + tmp_adapter); + } + + nesadapter->ref_count--; + if (!nesadapter->ref_count) { + if (nesadapter->hw_rev == NE020_REV) { + del_timer(&nesadapter->mh_timer); + } + del_timer(&nesadapter->lc_timer); + + list_del(&nesadapter->list); + kfree(nesadapter); + } +} + + +/** + * nes_init_cqp + */ +int nes_init_cqp(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_qp_context *cqp_qp_context; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_ceq *ceq; + struct nes_hw_ceq *nic_ceq; + struct nes_hw_aeq *aeq; + void *vmem; + dma_addr_t pmem; + u32 count=0; + u32 cqp_head; + u64 u64temp; + u32 u32temp; + + /* allocate CQP memory */ + /* Need to add max_cq to the aeq size once cq overflow checking is added back */ + /* SQ is 512 byte aligned, others are 256 byte aligned */ + nesdev->cqp_mem_size = 512 + + (sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) + + (sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) + + max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) + + max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) + + (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) + + sizeof(struct nes_hw_cqp_qp_context); + + nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size, + &nesdev->cqp_pbase); + if (!nesdev->cqp_vbase) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n"); + return -ENOMEM; + } + memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size); + + /* Allocate a twice the number of CQP requests as the SQ size */ + nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * + 2 * NES_CQP_SQ_SIZE, GFP_KERNEL); + if (nesdev->nes_cqp_requests == NULL) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n"); + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, + nesdev->cqp.sq_pbase); + return -ENOMEM; + } + + nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n", + nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size); + + spin_lock_init(&nesdev->cqp.lock); + init_waitqueue_head(&nesdev->cqp.waitq); + + /* Setup Various Structures */ + vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) & + ~(unsigned long)(512 - 1)); + pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) & + ~(unsigned long long)(512 - 1)); + + nesdev->cqp.sq_vbase = vmem; + nesdev->cqp.sq_pbase = pmem; + nesdev->cqp.sq_size = NES_CQP_SQ_SIZE; + nesdev->cqp.sq_head = 0; + nesdev->cqp.sq_tail = 0; + nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn); + + vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size); + pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size); + + nesdev->ccq.cq_vbase = vmem; + nesdev->ccq.cq_pbase = pmem; + nesdev->ccq.cq_size = NES_CCQ_SIZE; + nesdev->ccq.cq_head = 0; + nesdev->ccq.ce_handler = nes_cqp_ce_handler; + nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn); + + vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size); + pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size); + + nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn); + ceq = &nesadapter->ceq[nesdev->ceq_index]; + ceq->ceq_vbase = vmem; + ceq->ceq_pbase = pmem; + ceq->ceq_size = NES_CCEQ_SIZE; + ceq->ceq_head = 0; + + vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256); + pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256); + + nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8; + nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index]; + nic_ceq->ceq_vbase = vmem; + nic_ceq->ceq_pbase = pmem; + nic_ceq->ceq_size = NES_NIC_CEQ_SIZE; + nic_ceq->ceq_head = 0; + + vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256); + pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256); + + aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]; + aeq->aeq_vbase = vmem; + aeq->aeq_pbase = pmem; + aeq->aeq_size = nesadapter->max_qp; + aeq->aeq_head = 0; + + /* Setup QP Context */ + vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); + pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); + + cqp_qp_context = vmem; + cqp_qp_context->context_words[0] = + cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10)); + cqp_qp_context->context_words[1] = 0; + cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase); + cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32); + + + /* Write the address to Create CQP */ + if ((sizeof(dma_addr_t) > 4)) { + nes_write_indexed(nesdev, + NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), + ((u64)pmem) >> 32); + } else { + nes_write_indexed(nesdev, + NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0); + } + nes_write_indexed(nesdev, + NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8), + (u32)pmem); + + INIT_LIST_HEAD(&nesdev->cqp_avail_reqs); + INIT_LIST_HEAD(&nesdev->cqp_pending_reqs); + + for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) { + init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq); + list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs); + } + + /* Write Create CCQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nesdev->ccq.cq_number | + ((u32)nesdev->ceq_index << 16))); + u64temp = (u64)nesdev->ccq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&nesdev->ccq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = + cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + + /* Write Create CEQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size); + u64temp = (u64)ceq->ceq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + + /* Write Create AEQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size); + u64temp = (u64)aeq->aeq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + + /* Write Create NIC CEQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size); + u64temp = (u64)nic_ceq->ceq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + + /* Poll until CCQP done */ + count = 0; + do { + if (count++ > 1000) { + printk(KERN_ERR PFX "Error creating CQP\n"); + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, + nesdev->cqp_vbase, nesdev->cqp_pbase); + return -1; + } + udelay(10); + } while (!(nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8))); + + nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); + + u32temp = 0x04800000; + nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id); + + /* wait for the CCQ, CEQ, and AEQ to get created */ + count = 0; + do { + if (count++ > 1000) { + printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n"); + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, + nesdev->cqp_vbase, nesdev->cqp_pbase); + return -1; + } + udelay(10); + } while (((nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8))); + + /* dump the QP status value */ + nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); + + nesdev->cqp.sq_tail++; + + return 0; +} + + +/** + * nes_destroy_cqp + */ +int nes_destroy_cqp(struct nes_device *nesdev) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + u32 count = 0; + u32 cqp_head; + unsigned long flags; + + do { + if (count++ > 1000) + break; + udelay(10); + } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail)); + + /* Reset CCQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET | + nesdev->ccq.cq_number); + + /* Disable device interrupts */ + nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy the AEQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; + + /* Destroy the NIC CEQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | + ((u32)nesdev->nic_ceq_index << 8)); + + /* Destroy the CEQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | + (nesdev->ceq_index << 8)); + + /* Destroy the CCQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number | + ((u32)nesdev->ceq_index << 16)); + + /* Destroy CQP */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP | + NES_CQP_QP_TYPE_CQP); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id); + + barrier(); + /* Ring doorbell (5 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + /* wait for the CCQ, CEQ, and AEQ to get destroyed */ + count = 0; + do { + if (count++ > 1000) { + printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n", + PCI_FUNC(nesdev->pcidev->devfn)); + break; + } + udelay(10); + } while (((nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0)); + + /* dump the QP status value */ + nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n", + PCI_FUNC(nesdev->pcidev->devfn), + nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); + + kfree(nesdev->nes_cqp_requests); + + /* Free the control structures */ + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, + nesdev->cqp.sq_pbase); + + return 0; +} + + +/** + * nes_init_phy + */ +int nes_init_phy(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 counter = 0; + u32 mac_index = nesdev->mac_index; + u32 tx_config; + u16 phy_data; + + if (nesadapter->OneG_Mode) { + nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { + printk(PFX "%s: Programming mdc config for 1G\n", __FUNCTION__); + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config |= 0x04; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } + + nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); + + /* Reset the PHY */ + nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000); + udelay(100); + counter = 0; + do { + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data); + if (counter++ > 100) break; + } while (phy_data & 0x8000); + + /* Setting no phy loopback */ + phy_data &= 0xbfff; + phy_data |= 0x1140; + nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data); + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data); + + nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data); + + nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data); + + /* Setting the interrupt mask */ + nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data); + nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee); + + nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data); + + /* turning on flow control */ + nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data); + nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], + (phy_data & ~(0x03E0)) | 0xc00); + /* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], + phy_data | 0xc00); */ + nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data); + + nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data); + /* Clear Half duplex */ + nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], + phy_data & ~(0x0100)); + nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data); + + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); + nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300); + } else { + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { + /* setup 10G MDIO operation */ + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config |= 0x14; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } + } + return 0; +} + + +/** + * nes_replenish_nic_rq + */ +static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) +{ + unsigned long flags; + dma_addr_t bus_address; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_nic *nesnic; + struct nes_device *nesdev; + u32 rx_wqes_posted = 0; + + nesnic = &nesvnic->nic; + nesdev = nesvnic->nesdev; + spin_lock_irqsave(&nesnic->rq_lock, flags); + if (nesnic->replenishing_rq !=0) { + if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && + (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { + atomic_set(&nesvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ + add_timer(&nesvnic->rq_wqes_timer); + } else + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + return; + } + nesnic->replenishing_rq = 1; + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + do { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (skb) { + skb->dev = nesvnic->netdev; + + bus_address = pci_map_single(nesdev->pcidev, + skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + + nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = + cpu_to_le32(nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)bus_address); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)bus_address >> 32)); + nesnic->rx_skb[nesnic->rq_head] = skb; + nesnic->rq_head++; + nesnic->rq_head &= nesnic->rq_size - 1; + atomic_dec(&nesvnic->rx_skbs_needed); + barrier(); + if (++rx_wqes_posted == 255) { + nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); + rx_wqes_posted = 0; + } + } else { + spin_lock_irqsave(&nesnic->rq_lock, flags); + if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && + (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { + atomic_set(&nesvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ + add_timer(&nesvnic->rq_wqes_timer); + } else + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + break; + } + } while (atomic_read(&nesvnic->rx_skbs_needed)); + barrier(); + if (rx_wqes_posted) + nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); + nesnic->replenishing_rq = 0; +} + + +/** + * nes_rq_wqes_timeout + */ +static void nes_rq_wqes_timeout(unsigned long parm) +{ + struct nes_vnic *nesvnic = (struct nes_vnic *)parm; + printk("%s: Timer fired.\n", __FUNCTION__); + atomic_set(&nesvnic->rx_skb_timer_running, 0); + if (atomic_read(&nesvnic->rx_skbs_needed)) + nes_replenish_nic_rq(nesvnic); +} + + +/** + * nes_init_nic_qp + */ +int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct nes_hw_nic_qp_context *nic_context; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_vnic *nesvnic = netdev_priv(netdev); + unsigned long flags; + void *vmem; + dma_addr_t pmem; + u64 u64temp; + int ret; + u32 cqp_head; + u32 counter; + u32 wqe_count; + u8 jumbomode=0; + + /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ + nesvnic->nic_mem_size = 256 + + (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) + + (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) + + (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) + + (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) + + sizeof(struct nes_hw_nic_qp_context); + + nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size, + &nesvnic->nic_pbase); + if (!nesvnic->nic_vbase) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n"); + return -ENOMEM; + } + memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size); + nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n", + nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size); + + vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) & + ~(unsigned long)(256 - 1)); + pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) & + ~(unsigned long long)(256 - 1)); + + /* Setup the first Fragment buffers */ + nesvnic->nic.first_frag_vbase = vmem; + + for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) { + nesvnic->nic.frag_paddr[counter] = pmem; + pmem += sizeof(struct nes_first_frag); + } + + /* setup the SQ */ + vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)); + + nesvnic->nic.sq_vbase = (void *)vmem; + nesvnic->nic.sq_pbase = pmem; + nesvnic->nic.sq_head = 0; + nesvnic->nic.sq_tail = 0; + nesvnic->nic.sq_size = NES_NIC_WQ_SIZE; + for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) { + nic_sqe = &nesvnic->nic.sq_vbase[counter]; + nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM | + NES_NIC_SQ_WQE_COMPLETION); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] = + cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32)); + } + + nesvnic->get_cqp_request = nes_get_cqp_request; + nesvnic->post_cqp_request = nes_post_cqp_request; + nesvnic->mcrq_mcast_filter = NULL; + + spin_lock_init(&nesvnic->nic.sq_lock); + spin_lock_init(&nesvnic->nic.rq_lock); + + /* setup the RQ */ + vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)); + pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)); + + + nesvnic->nic.rq_vbase = vmem; + nesvnic->nic.rq_pbase = pmem; + nesvnic->nic.rq_head = 0; + nesvnic->nic.rq_tail = 0; + nesvnic->nic.rq_size = NES_NIC_WQ_SIZE; + + /* setup the CQ */ + vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)); + pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)); + + if (nesdev->nesadapter->netdev_count > 2) + nesvnic->mcrq_qp_id = nesvnic->nic_index + 32; + else + nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4; + + nesvnic->nic_cq.cq_vbase = vmem; + nesvnic->nic_cq.cq_pbase = pmem; + nesvnic->nic_cq.cq_head = 0; + nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2; + + nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler; + + /* Send CreateCQ request to CQP */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_head = nesdev->cqp.sq_head; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + ((u32)nesvnic->nic_cq.cq_size << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( + nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)); + u64temp = (u64)nesvnic->nic_cq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&nesvnic->nic_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + /* Send CreateQP request to CQP */ + nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]); + nic_context->context_words[NES_NIC_CTX_MISC_IDX] = + cpu_to_le32((u32)NES_NIC_CTX_SIZE | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); + nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); + if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) { + nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); + } + + u64temp = (u64)nesvnic->nic.sq_pbase; + nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)nesvnic->nic.rq_pbase; + nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | + NES_CQP_QP_TYPE_NIC); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id); + u64temp = (u64)nesvnic->nic_cq.cq_pbase + + (nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + nesdev->cqp.sq_head = cqp_head; + + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n", + nesvnic->nic.qp_id); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n", + nesvnic->nic.qp_id, ret); + if (!ret) { + nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id); + pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, + nesvnic->nic_pbase); + return -EIO; + } + + /* Populate the RQ */ + for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (!skb) { + nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); + + nes_destroy_nic_qp(nesvnic); + return -ENOMEM; + } + + skb->dev = netdev; + + pmem = pci_map_single(nesdev->pcidev, skb->data, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + + nic_rqe = &nesvnic->nic.rq_vbase[counter]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); + nesvnic->nic.rx_skb[counter] = skb; + } + + wqe_count = NES_NIC_WQ_SIZE - 1; + nesvnic->nic.rq_head = wqe_count; + barrier(); + do { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id); + } while (wqe_count); + init_timer(&nesvnic->rq_wqes_timer); + nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout; + nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic; + nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); + + if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) + { + nes_nic_init_timer(nesdev); + if (netdev->mtu > 1500) + jumbomode = 1; + nes_nic_init_timer_defaults(nesdev, jumbomode); + } + + return 0; +} + + +/** + * nes_destroy_nic_qp + */ +void nes_destroy_nic_qp(struct nes_vnic *nesvnic) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_nic_rq_wqe *nic_rqe; + u64 wqe_frag; + u32 cqp_head; + unsigned long flags; + int ret; + + /* Free remaining NIC receive buffers */ + while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { + nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; + wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; + pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); + nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); + } + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy NIC QP */ + cqp_head = nesdev->cqp.sq_head; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + nesvnic->nic.qp_id); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + + /* Destroy NIC CQ */ + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16))); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + nesdev->cqp.sq_head = cqp_head; + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," + " cqp.sq_tail=%u, cqp.sq_size=%u\n", + cqp_head, nesdev->cqp.sq_head, + nesdev->cqp.sq_tail, nesdev->cqp.sq_size); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u," + " cqp.sq_head=%u, cqp.sq_tail=%u\n", + ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + if (!ret) { + nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n", + nesvnic->nic.qp_id); + } + + pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, + nesvnic->nic_pbase); +} + +/** + * nes_napi_isr + */ +int nes_napi_isr(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 int_stat; + + if (nesdev->napi_isr_ran) { + /* interrupt status has already been read in ISR */ + int_stat = nesdev->int_stat; + } else { + int_stat = nes_read32(nesdev->regs + NES_INT_STAT); + nesdev->int_stat = int_stat; + nesdev->napi_isr_ran = 1; + } + + int_stat &= nesdev->int_req; + /* iff NIC, process here, else wait for DPC */ + if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) { + nesdev->napi_isr_ran = 0; + nes_write32(nesdev->regs+NES_INT_STAT, + (int_stat & + ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3))); + + /* Process the CEQs */ + nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]); + + if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) && + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesadapter->et_use_adaptive_rx_coalesce) && + (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) { + if ((nesdev->int_req & NES_INT_TIMER) == 0) { + /* Enable Periodic timer interrupts */ + nesdev->int_req |= NES_INT_TIMER; + /* ack any pending periodic timer interrupts so we don't get an immediate interrupt */ + /* TODO: need to also ack other unused periodic timer values, get from nesadapter */ + nes_write32(nesdev->regs+NES_TIMER_STAT, + nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); + nes_write32(nesdev->regs+NES_INTF_INT_MASK, + ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); + } + + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nes_nic_init_timer(nesdev); + } + /* Enable interrupts, except CEQs */ + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); + } else { + /* Enable interrupts, make sure timer is off */ + nesdev->int_req &= ~NES_INT_TIMER; + nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + nesadapter->tune_timer.timer_in_use_old = 0; + } + nesdev->deepcq_count = 0; + return 1; + } else { + return 0; + } +} + + +/** + * nes_dpc + */ +void nes_dpc(unsigned long param) +{ + struct nes_device *nesdev = (struct nes_device *)param; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 counter; + u32 loop_counter = 0; + u32 int_status_bit; + u32 int_stat; + u32 timer_stat; + u32 temp_int_stat; + u32 intf_int_stat; + u32 debug_error; + u32 processed_intf_int = 0; + u16 processed_timer_int = 0; + u16 completion_ints = 0; + u16 timer_ints = 0; + + /* nes_debug(NES_DBG_ISR, "\n"); */ + + do { + timer_stat = 0; + if (nesdev->napi_isr_ran) { + nesdev->napi_isr_ran = 0; + int_stat = nesdev->int_stat; + } else + int_stat = nes_read32(nesdev->regs+NES_INT_STAT); + if (processed_intf_int != 0) + int_stat &= nesdev->int_req & ~NES_INT_INTF; + else + int_stat &= nesdev->int_req; + if (processed_timer_int == 0) { + processed_timer_int = 1; + if (int_stat & NES_INT_TIMER) { + timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); + if ((timer_stat & nesdev->timer_int_req) == 0) { + int_stat &= ~NES_INT_TIMER; + } + } + } else { + int_stat &= ~NES_INT_TIMER; + } + + if (int_stat) { + if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| + NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) { + /* Ack the interrupts */ + nes_write32(nesdev->regs+NES_INT_STAT, + (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| + NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3))); + } + + temp_int_stat = int_stat; + for (counter = 0, int_status_bit = 1; counter < 16; counter++) { + if (int_stat & int_status_bit) { + nes_process_ceq(nesdev, &nesadapter->ceq[counter]); + temp_int_stat &= ~int_status_bit; + completion_ints = 1; + } + if (!(temp_int_stat & 0x0000ffff)) + break; + int_status_bit <<= 1; + } + + /* Process the AEQ for this pci function */ + int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn)); + if (int_stat & int_status_bit) { + nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]); + } + + /* Process the MAC interrupt for this pci function */ + int_status_bit = 1 << (24 + nesdev->mac_index); + if (int_stat & int_status_bit) { + nes_process_mac_intr(nesdev, nesdev->mac_index); + } + + if (int_stat & NES_INT_TIMER) { + if (timer_stat & nesdev->timer_int_req) { + nes_write32(nesdev->regs + NES_TIMER_STAT, + (timer_stat & nesdev->timer_int_req) | + ~(nesdev->nesadapter->timer_int_req)); + timer_ints = 1; + } + } + + if (int_stat & NES_INT_INTF) { + processed_intf_int = 1; + intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); + intf_int_stat &= nesdev->intf_int_req; + if (NES_INTF_INT_CRITERR & intf_int_stat) { + debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); + printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", + (u16)debug_error); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, + 0x01010000 | (debug_error & 0x0000ffff)); + /* BUG(); */ + if (crit_err_count++ > 10) + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + } + if (NES_INTF_INT_PCIERR & intf_int_stat) { + printk(KERN_ERR PFX "PCI Error reported by device!!!\n"); + BUG(); + } + if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) { + printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n"); + BUG(); + } + nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat); + } + + if (int_stat & NES_INT_TSW) { + } + } + /* Don't use the interface interrupt bit stay in loop */ + int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| + NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3; + } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS)); + + if (timer_ints == 1) { + if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) { + if (completion_ints == 0) { + nesdev->timer_only_int_count++; + if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) { + nesdev->timer_only_int_count = 0; + nesdev->int_req &= ~NES_INT_TIMER; + nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + nesdev->nesadapter->tune_timer.timer_in_use_old = 0; + } else { + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); + } + } else { + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nes_nic_init_timer(nesdev); + } + nesdev->timer_only_int_count = 0; + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); + } + } else { + nesdev->timer_only_int_count = 0; + nesdev->int_req &= ~NES_INT_TIMER; + nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + nes_write32(nesdev->regs+NES_TIMER_STAT, + nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + } + } else { + if ( (completion_ints == 1) && + (((nesadapter->et_rx_coalesce_usecs_irq) && + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) && + (nesadapter->et_use_adaptive_rx_coalesce) )) ) { + /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */ + nesdev->timer_only_int_count = 0; + nesdev->int_req |= NES_INT_TIMER; + nes_write32(nesdev->regs+NES_TIMER_STAT, + nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); + nes_write32(nesdev->regs+NES_INTF_INT_MASK, + ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); + } else { + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + } + } + nesdev->deepcq_count = 0; +} + + +/** + * nes_process_ceq + */ +void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) +{ + u64 u64temp; + struct nes_hw_cq *cq; + u32 head; + u32 ceq_size; + + /* nes_debug(NES_DBG_CQ, "\n"); */ + head = ceq->ceq_head; + ceq_size = ceq->ceq_size; + + do { + if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) & + NES_CEQE_VALID) { + u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) | + ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX]))); + u64temp <<= 1; + cq = *((struct nes_hw_cq **)&u64temp); + /* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */ + barrier(); + ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0; + + /* call the event handler */ + cq->ce_handler(nesdev, cq); + + if (++head >= ceq_size) + head = 0; + } else { + break; + } + + } while (1); + + ceq->ceq_head = head; +} + + +/** + * nes_process_aeq + */ +void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) +{ +// u64 u64temp; + u32 head; + u32 aeq_size; + u32 aeqe_misc; + u32 aeqe_cq_id; + struct nes_hw_aeqe volatile *aeqe; + + head = aeq->aeq_head; + aeq_size = aeq->aeq_size; + + do { + aeqe = &aeq->aeq_vbase[head]; + if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0) + break; + aeqe_misc = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); + if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) { + if (aeqe_cq_id >= NES_FIRST_QPN) { + /* dealing with an accelerated QP related AE */ +// u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) | +// ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); + nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe); + } else { + /* TODO: dealing with a CQP related AE */ + nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n", + (u16)(aeqe_misc >> 16)); + } + } + + aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0; + + if (++head >= aeq_size) + head = 0; + } + while (1); + aeq->aeq_head = head; +} + +static void nes_reset_link(struct nes_device *nesdev, u32 mac_index) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 reset_value; + u32 i=0; + u32 u32temp; + + if (nesadapter->hw_rev == NE020_REV) { + return; + } + mh_detected++; + + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + + if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode))) + reset_value |= 0x0000001d; + else + reset_value |= 0x0000002d; + + if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) { + if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) { + nesadapter->link_interrupt_count[0] = 0; + nesadapter->link_interrupt_count[1] = 0; + u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + if (0x00000040 & u32temp) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); + + reset_value |= 0x0000003d; + } + nesadapter->link_interrupt_count[mac_index] = 0; + } + + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)); + + if (0x0000003d == (reset_value & 0x0000003d)) { + u32 pcs_control_status0, pcs_control_status1; + + for (i = 0; i < 10; i++) { + pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + if (((0x0F000000 == (pcs_control_status0 & 0x0F000000)) + && (pcs_control_status0 & 0x00100000)) + || ((0x0F000000 == (pcs_control_status1 & 0x0F000000)) + && (pcs_control_status1 & 0x00100000))) + continue; + else + break; + } + if (10 == i) { + u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + if (0x00000040 & u32temp) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); + + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)); + } + } +} + +/** + * nes_process_mac_intr + */ +void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) +{ + unsigned long flags; + u32 pcs_control_status; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 mac_status; + u32 mac_index = nesdev->mac_index; + u32 u32temp; + u16 phy_data; + u16 temp_phy_data; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + return; + } + nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT; + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + + /* ack the MAC interrupt */ + mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200)); + /* Clear the interrupt */ + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status); + + nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status); + + if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) { + nesdev->link_status_interrupts++; + if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) { + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_reset_link(nesdev, mac_index); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + } + /* read the PHY interrupt status register */ + if (nesadapter->OneG_Mode) { + do { + nes_read_1G_phy_reg(nesdev, 0x1a, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + } while (phy_data&0x8000); + + temp_phy_data = 0; + do { + nes_read_1G_phy_reg(nesdev, 0x11, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + if (temp_phy_data == phy_data) + break; + temp_phy_data = phy_data; + } while (1); + + nes_read_1G_phy_reg(nesdev, 0x1e, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + + nes_read_1G_phy_reg(nesdev, 1, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n", + nesadapter->phy_index[mac_index], phy_data); + + if (temp_phy_data & 0x1000) { + nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n"); + phy_data = 4; + } else { + nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n"); + } + } + nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0), + nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200)); + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200)); + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200)); + nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n", + mac_index, pcs_control_status); + if (nesadapter->OneG_Mode) { + u32temp = 0x01010000; + if (nesadapter->port_count > 2) { + u32temp |= 0x02020000; + } + if ((pcs_control_status & u32temp)!= u32temp) { + phy_data = 0; + nes_debug(NES_DBG_PHY, "PCS says the link is down\n"); + } + } else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { + nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); + temp_phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); + phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __FUNCTION__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); + + } else { + phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0; + } + + if (phy_data & 0x0004) { + nesadapter->mac_link_down[mac_index] = 0; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", + nesvnic->linkup); + if (nesvnic->linkup == 0) { + printk(PFX "The Link is now up for port %u, netdev %p.\n", + mac_index, nesvnic->netdev); + if (netif_queue_stopped(nesvnic->netdev)) + netif_start_queue(nesvnic->netdev); + nesvnic->linkup = 1; + netif_carrier_on(nesvnic->netdev); + } + } + } else { + nesadapter->mac_link_down[mac_index] = 1; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", + nesvnic->linkup); + if (nesvnic->linkup == 1) { + printk(PFX "The Link is now down for port %u, netdev %p.\n", + mac_index, nesvnic->netdev); + if (!(netif_queue_stopped(nesvnic->netdev))) + netif_stop_queue(nesvnic->netdev); + nesvnic->linkup = 0; + netif_carrier_off(nesvnic->netdev); + } + } + } + } + + nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE; +} + + + +void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); + + netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi); +} + + +/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before +* getting out of nic_ce_handler +*/ +#define MAX_RQES_TO_PROCESS 384 + +/** + * nes_nic_ce_handler + */ +void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + u64 u64temp; + dma_addr_t bus_address; + struct nes_hw_nic *nesnic; + struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct sk_buff *skb; + struct sk_buff *rx_skb; + __le16 *wqe_fragment_length; + u32 head; + u32 cq_size; + u32 rx_pkt_size; + u32 cqe_count=0; + u32 cqe_errv; + u32 cqe_misc; + u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ + u16 vlan_tag; + u16 pkt_type; + u16 rqes_processed = 0; + u8 sq_cqes = 0; + + head = cq->cq_head; + cq_size = cq->cq_size; + cq->cqes_pending = 1; + do { + if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & + NES_NIC_CQE_VALID) { + nesnic = &nesvnic->nic; + cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); + if (cqe_misc & NES_NIC_CQE_SQ) { + sq_cqes++; + wqe_fragment_index = 1; + nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail]; + skb = nesnic->tx_skb[nesnic->sq_tail]; + wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* bump past the vlan tag */ + wqe_fragment_length++; + if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { + u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) { + pci_unmap_single(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]), + PCI_DMA_TODEVICE); + } + for (; wqe_fragment_index < 5; wqe_fragment_index++) { + if (wqe_fragment_length[wqe_fragment_index]) { + u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + pci_unmap_page(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[wqe_fragment_index]), + PCI_DMA_TODEVICE); + } else + break; + } + if (skb) + dev_kfree_skb_any(skb); + } + nesnic->sq_tail++; + nesnic->sq_tail &= nesnic->sq_size-1; + if (sq_cqes > 128) { + barrier(); + /* restart the queue if it had been stopped */ + if (netif_queue_stopped(nesvnic->netdev)) + netif_wake_queue(nesvnic->netdev); + sq_cqes = 0; + } + } else { + rqes_processed ++; + + cq->rx_cqes_completed++; + cq->rx_pkts_indicated++; + rx_pkt_size = cqe_misc & 0x0000ffff; + nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail]; + /* Get the skb */ + rx_skb = nesnic->rx_skb[nesnic->rq_tail]; + nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail]; + bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; + pci_unmap_single(nesdev->pcidev, bus_address, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + /* rx_skb->tail = rx_skb->data + rx_pkt_size; */ + /* rx_skb->len = rx_pkt_size; */ + rx_skb->len = 0; /* TODO: see if this is necessary */ + skb_put(rx_skb, rx_pkt_size); + rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev); + nesnic->rq_tail++; + nesnic->rq_tail &= nesnic->rq_size - 1; + + atomic_inc(&nesvnic->rx_skbs_needed); + if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) { + nes_write32(nesdev->regs+NES_CQE_ALLOC, + cq->cq_number | (cqe_count << 16)); +// nesadapter->tune_timer.cq_count += cqe_count; + nesdev->currcq_count += cqe_count; + cqe_count = 0; + nes_replenish_nic_rq(nesvnic); + } + pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])); + cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT; + rx_skb->ip_summed = CHECKSUM_NONE; + + if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) || + (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) { + if ((cqe_errv & + (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR | + NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { + if (nesvnic->rx_checksum_disabled == 0) { + rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } else + nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." + " errv = 0x%X, pkt_type = 0x%X.\n", + nesvnic->netdev->name, cqe_errv, pkt_type); + + } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) { + if ((cqe_errv & + (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR | + NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { + if (nesvnic->rx_checksum_disabled == 0) { + rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n", + nesvnic->netdev->name); */ + } + } else + nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." + " errv = 0x%X, pkt_type = 0x%X.\n", + nesvnic->netdev->name, cqe_errv, pkt_type); + } + /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n", + pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ + + if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { + nes_cm_recv(rx_skb, nesvnic->netdev); + } else { + if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) { + vlan_tag = (u16)(le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) + >> 16); + nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", + nesvnic->netdev->name, vlan_tag); + nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); + } else { + nes_netif_rx(rx_skb); + } + } + + nesvnic->netdev->last_rx = jiffies; + /* nesvnic->netstats.rx_packets++; */ + /* nesvnic->netstats.rx_bytes += rx_pkt_size; */ + } + + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; + /* Accounting... */ + cqe_count++; + if (++head >= cq_size) + head = 0; + if (cqe_count == 255) { + /* Replenish Nic CQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, + cq->cq_number | (cqe_count << 16)); +// nesdev->nesadapter->tune_timer.cq_count += cqe_count; + nesdev->currcq_count += cqe_count; + cqe_count = 0; + } + + if (cq->rx_cqes_completed >= nesvnic->budget) + break; + } else { + cq->cqes_pending = 0; + break; + } + + } while (1); + + if (sq_cqes) { + barrier(); + /* restart the queue if it had been stopped */ + if (netif_queue_stopped(nesvnic->netdev)) + netif_wake_queue(nesvnic->netdev); + } + + cq->cq_head = head; + /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n", + cq->cq_number, cqe_count, cq->cq_head); */ + cq->cqe_allocs_pending = cqe_count; + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { +// nesdev->nesadapter->tune_timer.cq_count += cqe_count; + nesdev->currcq_count += cqe_count; + nes_nic_tune_timer(nesdev); + } + if (atomic_read(&nesvnic->rx_skbs_needed)) + nes_replenish_nic_rq(nesvnic); + } + + +/** + * nes_cqp_ce_handler + */ +void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) +{ + u64 u64temp; + unsigned long flags; + struct nes_hw_cqp *cqp = NULL; + struct nes_cqp_request *cqp_request; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 head; + u32 cq_size; + u32 cqe_count=0; + u32 error_code; + /* u32 counter; */ + + head = cq->cq_head; + cq_size = cq->cq_size; + + do { + /* process the CQE */ + /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head, + le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */ + + if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { + u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | + ((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); + cqp = *((struct nes_hw_cqp **)&u64temp); + + error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]); + if (error_code) { + nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP," + " Major/Minor codes = 0x%04X:%04X.\n", + le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f, + (u16)(error_code >> 16), + (u16)error_code); + nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n", + cqp->qp_id, cqp->sq_head, cqp->sq_tail); + } + + u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. + wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) | + ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. + wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]))); + cqp_request = *((struct nes_cqp_request **)&u64temp); + if (cqp_request) { + if (cqp_request->waiting) { + /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */ + cqp_request->major_code = (u16)(error_code >> 16); + cqp_request->minor_code = (u16)error_code; + barrier(); + cqp_request->request_done = 1; + wake_up(&cqp_request->waitq); + if (atomic_dec_and_test(&cqp_request->refcount)) { + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } else if (cqp_request->callback) { + /* Envoke the callback routine */ + cqp_request->cqp_callback(nesdev, cqp_request); + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } else { + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } else { + wake_up(&nesdev->cqp.waitq); + } + + cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; + nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16)); + if (++cqp->sq_tail >= cqp->sq_size) + cqp->sq_tail = 0; + + /* Accounting... */ + cqe_count++; + if (++head >= cq_size) + head = 0; + } else { + break; + } + } while (1); + cq->cq_head = head; + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + while ((!list_empty(&nesdev->cqp_pending_reqs)) && + ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) & + (nesdev->cqp.sq_size - 1)) != 1)) { + cqp_request = list_entry(nesdev->cqp_pending_reqs.next, + struct nes_cqp_request, list); + list_del_init(&cqp_request->list); + head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[head]; + memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + barrier(); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = + cpu_to_le32((u32)((unsigned long)cqp_request)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = + cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request))); + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n", + cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head); + /* Ring doorbell (1 WQEs) */ + barrier(); + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); + } + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + /* Arm the CCQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + cq->cq_number); + nes_read32(nesdev->regs+NES_CQE_ALLOC); +} + + +/** + * nes_process_iwarp_aeqe + */ +void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe) +{ + u64 context; + u64 aeqe_context = 0; + unsigned long flags; + struct nes_qp *nesqp; + int resource_allocated; + /* struct iw_cm_id *cm_id; */ + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct ib_event ibevent; + /* struct iw_cm_event cm_event; */ + u32 aeq_info; + u32 next_iwarp_state = 0; + u16 async_event_id; + u8 tcp_state; + u8 iwarp_state; + + nes_debug(NES_DBG_AEQ, "\n"); + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) { + context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); + context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; + } else { + aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); + aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; + context = (unsigned long)nesadapter->qp_table[le32_to_cpu( + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; + BUG_ON(!context); + } + + async_event_id = (u16)aeq_info; + tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; + iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; + nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p," + " Tcp state = %s, iWARP state = %s\n", + async_event_id, + le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, + nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); + + + switch (async_event_id) { + case NES_AEQE_AEID_LLP_FIN_RECEIVED: + nesqp = *((struct nes_qp **)&context); + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + nesqp->cm_id->add_ref(nesqp->cm_id); + nes_add_ref(&nesqp->ibqp); + schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, + NES_TIMER_TYPE_CLOSE, 1, 0); + nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," + " need ae to finish up, original_last_aeq = 0x%04X." + " last_aeq = 0x%04X, scheduling timer. TCP state = %d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + async_event_id, nesqp->last_aeq, tcp_state); + } + if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) || + (nesqp->ibqp_state != IB_QPS_RTS)) { + /* FIN Received but tcp state or IB state moved on, + should expect a close complete */ + return; + } + case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: + case NES_AEQE_AEID_LLP_CONNECTION_RESET: + case NES_AEQE_AEID_TERMINATE_SENT: + case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE: + case NES_AEQE_AEID_RESET_SENT: + nesqp = *((struct nes_qp **)&context); + if (async_event_id == NES_AEQE_AEID_RESET_SENT) { + tcp_state = NES_AEQE_TCP_STATE_CLOSED; + } + nes_add_ref(&nesqp->ibqp); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) || + (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) { + nesqp->hte_added = 0; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n", + nesqp->hwqp.qp_id); + nes_hw_modify_qp(nesdev, nesqp, + NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0); + spin_lock_irqsave(&nesqp->lock, flags); + } + + if ((nesqp->ibqp_state == IB_QPS_RTS) && + ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || + (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + switch (nesqp->hw_iwarp_state) { + case NES_AEQE_IWARP_STATE_RTS: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + break; + case NES_AEQE_IWARP_STATE_TERMINATE: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; + if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { + next_iwarp_state |= 0x02000000; + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + } + break; + default: + next_iwarp_state = 0; + } + spin_unlock_irqrestore(&nesqp->lock, flags); + if (next_iwarp_state) { + nes_add_ref(&nesqp->ibqp); + nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," + " also added another reference\n", + nesqp->hwqp.qp_id, next_iwarp_state); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0); + } + nes_cm_disconn(nesqp); + } else { + if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) { + /* FIN Received but ib state not RTS, + close complete will be on its way */ + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_rem_ref(&nesqp->ibqp); + return; + } + spin_unlock_irqrestore(&nesqp->lock, flags); + if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { + next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000; + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," + " also added another reference\n", + nesqp->hwqp.qp_id, next_iwarp_state); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0); + } + nes_cm_disconn(nesqp); + } + break; + case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: + nesqp = *((struct nes_qp **)&context); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED" + " event on QP%u \n Q2 Data:\n", + nesqp->hwqp.qp_id); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_FATAL; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || + ((nesqp->ibqp_state == IB_QPS_RTS)&& + (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + nes_add_ref(&nesqp->ibqp); + nes_cm_disconn(nesqp); + } else { + nesqp->in_disconnect = 0; + wake_up(&nesqp->kick_waitq); + } + break; + case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: + nesqp = *((struct nes_qp **)&context); + nes_add_ref(&nesqp->ibqp); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = async_event_id; + if (nesqp->cm_id) { + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES" + " event on QP%u, remote IP = 0x%08X \n", + nesqp->hwqp.qp_id, + ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr)); + } else { + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES" + " event on QP%u \n", + nesqp->hwqp.qp_id); + } + spin_unlock_irqrestore(&nesqp->lock, flags); + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET; + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_FATAL; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + break; + case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: + if (NES_AEQE_INBOUND_RDMA&aeq_info) { + nesqp = nesadapter->qp_table[le32_to_cpu( + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; + } else { + /* TODO: get the actual WQE and mask off wqe index */ + context &= ~((u64)511); + nesqp = *((struct nes_qp **)&context); + } + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n", + nesqp->hwqp.qp_id); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_ACCESS_ERR; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + break; + case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: + nesqp = *((struct nes_qp **)&context); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n", + nesqp->hwqp.qp_id); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_ACCESS_ERR; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + break; + case NES_AEQE_AEID_PRIV_OPERATION_DENIED: + nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words + [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u," + " nesqp = %p, AE reported %p\n", + nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context)); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_ACCESS_ERR; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + break; + case NES_AEQE_AEID_CQ_OPERATION_ERROR: + context <<= 1; + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n", + le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context); + resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs, + le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); + if (resource_allocated) { + printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n", + __FUNCTION__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); + } + break; + case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + nesqp = nesadapter->qp_table[le32_to_cpu( + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG" + "_FOR_AVAILABLE_BUFFER event on QP%u\n", + nesqp->hwqp.qp_id); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_ACCESS_ERR; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + /* tell cm to disconnect, cm will queue work to thread */ + nes_add_ref(&nesqp->ibqp); + nes_cm_disconn(nesqp); + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: + nesqp = *((struct nes_qp **)&context); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN" + "_NO_BUFFER_AVAILABLE event on QP%u\n", + nesqp->hwqp.qp_id); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_FATAL; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + /* tell cm to disconnect, cm will queue work to thread */ + nes_add_ref(&nesqp->ibqp); + nes_cm_disconn(nesqp); + break; + case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: + nesqp = *((struct nes_qp **)&context); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR" + " event on QP%u \n Q2 Data:\n", + nesqp->hwqp.qp_id); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_FATAL; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + /* tell cm to disconnect, cm will queue work to thread */ + nes_add_ref(&nesqp->ibqp); + nes_cm_disconn(nesqp); + break; + /* TODO: additional AEs need to be here */ + default: + nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", + async_event_id); + break; + } + +} + + +/** + * nes_iwarp_ce_handler + */ +void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq) +{ + struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq); + + /* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n", + nescq->hw_cq.cq_number); */ + nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number); + + if (nescq->ibcq.comp_handler) + nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context); + + return; +} + + +/** + * nes_manage_apbvt() + */ +int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, + u32 nic_index, u32 add_port) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_cqp_wqe *cqp_wqe; + unsigned long flags; + struct nes_cqp_request *cqp_request; + int ret = 0; + u16 major_code; + + /* Send manage APBVT request to CQP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n", + (add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL", + accel_local_port, accel_local_port, nic_index); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT | + ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + ((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port)); + + nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + if (add_port == NES_MANAGE_APBVT_ADD) + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", + ret, cqp_request->major_code, cqp_request->minor_code); + major_code = cqp_request->major_code; + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + else + return 0; +} + + +/** + * nes_manage_arp_cache + */ +void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, + u32 ip_addr, u32 action) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev; + struct nes_cqp_request *cqp_request; + int arp_index; + + nesdev = nesvnic->nesdev; + arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action); + if (arp_index == -1) { + return; + } + + /* update the ARP entry */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n"); + return; + } + cqp_request->waiting = 0; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32( + (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index); + + if (action == NES_ARP_ADD) { + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID); + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32( + (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | + (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( + (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); + } else { + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; + } + + nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n", + nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); +} + + +/** + * flush_wqes + */ +void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, + u32 which_wq, u32 wait_completion) +{ + unsigned long flags; + struct nes_cqp_request *cqp_request; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret; + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); + return; + } + if (wait_completion) { + cqp_request->waiting = 1; + atomic_set(&cqp_request->refcount, 2); + } else { + cqp_request->waiting = 0; + } + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); + + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + if (wait_completion) { + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," + " CQP Major:Minor codes = 0x%04X:0x%04X\n", + ret, cqp_request->major_code, cqp_request->minor_code); + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } +} diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h new file mode 100644 index 000000000000..1e10df550c9e --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -0,0 +1,1206 @@ +/* +* Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __NES_HW_H +#define __NES_HW_H + +#define NES_PHY_TYPE_1G 2 +#define NES_PHY_TYPE_IRIS 3 +#define NES_PHY_TYPE_PUMA_10G 6 + +#define NES_MULTICAST_PF_MAX 8 + +enum pci_regs { + NES_INT_STAT = 0x0000, + NES_INT_MASK = 0x0004, + NES_INT_PENDING = 0x0008, + NES_INTF_INT_STAT = 0x000C, + NES_INTF_INT_MASK = 0x0010, + NES_TIMER_STAT = 0x0014, + NES_PERIODIC_CONTROL = 0x0018, + NES_ONE_SHOT_CONTROL = 0x001C, + NES_EEPROM_COMMAND = 0x0020, + NES_EEPROM_DATA = 0x0024, + NES_FLASH_COMMAND = 0x0028, + NES_FLASH_DATA = 0x002C, + NES_SOFTWARE_RESET = 0x0030, + NES_CQ_ACK = 0x0034, + NES_WQE_ALLOC = 0x0040, + NES_CQE_ALLOC = 0x0044, +}; + +enum indexed_regs { + NES_IDX_CREATE_CQP_LOW = 0x0000, + NES_IDX_CREATE_CQP_HIGH = 0x0004, + NES_IDX_QP_CONTROL = 0x0040, + NES_IDX_FLM_CONTROL = 0x0080, + NES_IDX_INT_CPU_STATUS = 0x00a0, + NES_IDX_GPIO_CONTROL = 0x00f0, + NES_IDX_GPIO_DATA = 0x00f4, + NES_IDX_TCP_CONFIG0 = 0x01e4, + NES_IDX_TCP_TIMER_CONFIG = 0x01ec, + NES_IDX_TCP_NOW = 0x01f0, + NES_IDX_QP_MAX_CFG_SIZES = 0x0200, + NES_IDX_QP_CTX_SIZE = 0x0218, + NES_IDX_TCP_TIMER_SIZE0 = 0x0238, + NES_IDX_TCP_TIMER_SIZE1 = 0x0240, + NES_IDX_ARP_CACHE_SIZE = 0x0258, + NES_IDX_CQ_CTX_SIZE = 0x0260, + NES_IDX_MRT_SIZE = 0x0278, + NES_IDX_PBL_REGION_SIZE = 0x0280, + NES_IDX_IRRQ_COUNT = 0x02b0, + NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0, + NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300, + NES_IDX_DST_IP_ADDR = 0x0400, + NES_IDX_PCIX_DIAG = 0x08e8, + NES_IDX_MPP_DEBUG = 0x0a00, + NES_IDX_PORT_RX_DISCARDS = 0x0a30, + NES_IDX_PORT_TX_DISCARDS = 0x0a34, + NES_IDX_MPP_LB_DEBUG = 0x0b00, + NES_IDX_DENALI_CTL_22 = 0x1058, + NES_IDX_MAC_TX_CONTROL = 0x2000, + NES_IDX_MAC_TX_CONFIG = 0x2004, + NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008, + NES_IDX_MAC_RX_CONTROL = 0x200c, + NES_IDX_MAC_RX_CONFIG = 0x2010, + NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c, + NES_IDX_MAC_MDIO_CONTROL = 0x2084, + NES_IDX_MAC_TX_OCTETS_LOW = 0x2100, + NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104, + NES_IDX_MAC_TX_FRAMES_LOW = 0x2108, + NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c, + NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118, + NES_IDX_MAC_TX_ERRORS = 0x2138, + NES_IDX_MAC_RX_OCTETS_LOW = 0x213c, + NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140, + NES_IDX_MAC_RX_FRAMES_LOW = 0x2144, + NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148, + NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c, + NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150, + NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154, + NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174, + NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178, + NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c, + NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180, + NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184, + NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188, + NES_IDX_MAC_INT_STATUS = 0x21f0, + NES_IDX_MAC_INT_MASK = 0x21f4, + NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800, + NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00, + NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808, + NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08, + NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c, + NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c, + NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810, + NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10, + NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814, + NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14, + NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818, + NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18, + NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c, + NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c, + NES_IDX_ETH_SERDES_BYPASS0 = 0x2820, + NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20, + NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824, + NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24, + NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828, + NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28, + NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c, + NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c, + NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830, + NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30, + NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834, + NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34, + NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838, + NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38, + NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, + NES_IDX_CM_CONFIG = 0x5100, + NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, + NES_IDX_NIC_PHYPORT_TO_USW = 0x6008, + NES_IDX_NIC_ACTIVE = 0x6010, + NES_IDX_NIC_UNICAST_ALL = 0x6018, + NES_IDX_NIC_MULTICAST_ALL = 0x6020, + NES_IDX_NIC_MULTICAST_ENABLE = 0x6028, + NES_IDX_NIC_BROADCAST_ON = 0x6030, + NES_IDX_USED_CHUNKS_TX = 0x60b0, + NES_IDX_TX_POOL_SIZE = 0x60b8, + NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148, + NES_IDX_PERFECT_FILTER_LOW = 0x6200, + NES_IDX_PERFECT_FILTER_HIGH = 0x6204, + NES_IDX_IPV4_TCP_REXMITS = 0x7080, + NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c, + NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140, + NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144, + NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148, + NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c, + NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150, + NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154, +}; + +#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1 +#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17) + +enum nes_cqp_opcodes { + NES_CQP_CREATE_QP = 0x00, + NES_CQP_MODIFY_QP = 0x01, + NES_CQP_DESTROY_QP = 0x02, + NES_CQP_CREATE_CQ = 0x03, + NES_CQP_MODIFY_CQ = 0x04, + NES_CQP_DESTROY_CQ = 0x05, + NES_CQP_ALLOCATE_STAG = 0x09, + NES_CQP_REGISTER_STAG = 0x0a, + NES_CQP_QUERY_STAG = 0x0b, + NES_CQP_REGISTER_SHARED_STAG = 0x0c, + NES_CQP_DEALLOCATE_STAG = 0x0d, + NES_CQP_MANAGE_ARP_CACHE = 0x0f, + NES_CQP_SUSPEND_QPS = 0x11, + NES_CQP_UPLOAD_CONTEXT = 0x13, + NES_CQP_CREATE_CEQ = 0x16, + NES_CQP_DESTROY_CEQ = 0x18, + NES_CQP_CREATE_AEQ = 0x19, + NES_CQP_DESTROY_AEQ = 0x1b, + NES_CQP_LMI_ACCESS = 0x20, + NES_CQP_FLUSH_WQES = 0x22, + NES_CQP_MANAGE_APBVT = 0x23 +}; + +enum nes_cqp_wqe_word_idx { + NES_CQP_WQE_OPCODE_IDX = 0, + NES_CQP_WQE_ID_IDX = 1, + NES_CQP_WQE_COMP_CTX_LOW_IDX = 2, + NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3, + NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4, + NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5, +}; + +enum nes_cqp_cq_wqeword_idx { + NES_CQP_CQ_WQE_PBL_LOW_IDX = 6, + NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7, + NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8, + NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9, + NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10, +}; + +enum nes_cqp_stag_wqeword_idx { + NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1, + NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6, + NES_CQP_STAG_WQE_LEN_LOW_IDX = 7, + NES_CQP_STAG_WQE_STAG_IDX = 8, + NES_CQP_STAG_WQE_VA_LOW_IDX = 10, + NES_CQP_STAG_WQE_VA_HIGH_IDX = 11, + NES_CQP_STAG_WQE_PA_LOW_IDX = 12, + NES_CQP_STAG_WQE_PA_HIGH_IDX = 13, + NES_CQP_STAG_WQE_PBL_LEN_IDX = 14 +}; + +#define NES_CQP_OP_IWARP_STATE_SHIFT 28 + +enum nes_cqp_qp_bits { + NES_CQP_QP_ARP_VALID = (1<<8), + NES_CQP_QP_WINBUF_VALID = (1<<9), + NES_CQP_QP_CONTEXT_VALID = (1<<10), + NES_CQP_QP_ORD_VALID = (1<<11), + NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12), + NES_CQP_QP_VIRT_WQS = (1<<13), + NES_CQP_QP_DEL_HTE = (1<<14), + NES_CQP_QP_CQS_VALID = (1<<15), + NES_CQP_QP_TYPE_TSA = 0, + NES_CQP_QP_TYPE_IWARP = (1<<16), + NES_CQP_QP_TYPE_CQP = (4<<16), + NES_CQP_QP_TYPE_NIC = (5<<16), + NES_CQP_QP_MSS_CHG = (1<<20), + NES_CQP_QP_STATIC_RESOURCES = (1<<21), + NES_CQP_QP_IGNORE_MW_BOUND = (1<<22), + NES_CQP_QP_VWQ_USE_LMI = (1<<23), + NES_CQP_QP_IWARP_STATE_IDLE = (1<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_IWARP_STATE_RTS = (2<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_IWARP_STATE_CLOSING = (3<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_RESET = (1<<31), +}; + +enum nes_cqp_qp_wqe_word_idx { + NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6, + NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7, + NES_CQP_QP_WQE_NEW_MSS_IDX = 15, +}; + +enum nes_nic_ctx_bits { + NES_NIC_CTX_RQ_SIZE_32 = (3<<8), + NES_NIC_CTX_RQ_SIZE_512 = (3<<8), + NES_NIC_CTX_SQ_SIZE_32 = (1<<10), + NES_NIC_CTX_SQ_SIZE_512 = (3<<10), +}; + +enum nes_nic_qp_ctx_word_idx { + NES_NIC_CTX_MISC_IDX = 0, + NES_NIC_CTX_SQ_LOW_IDX = 2, + NES_NIC_CTX_SQ_HIGH_IDX = 3, + NES_NIC_CTX_RQ_LOW_IDX = 4, + NES_NIC_CTX_RQ_HIGH_IDX = 5, +}; + +enum nes_cqp_cq_bits { + NES_CQP_CQ_CEQE_MASK = (1<<9), + NES_CQP_CQ_CEQ_VALID = (1<<10), + NES_CQP_CQ_RESIZE = (1<<11), + NES_CQP_CQ_CHK_OVERFLOW = (1<<12), + NES_CQP_CQ_4KB_CHUNK = (1<<14), + NES_CQP_CQ_VIRT = (1<<15), +}; + +enum nes_cqp_stag_bits { + NES_CQP_STAG_VA_TO = (1<<9), + NES_CQP_STAG_DEALLOC_PBLS = (1<<10), + NES_CQP_STAG_PBL_BLK_SIZE = (1<<11), + NES_CQP_STAG_MR = (1<<13), + NES_CQP_STAG_RIGHTS_LOCAL_READ = (1<<16), + NES_CQP_STAG_RIGHTS_LOCAL_WRITE = (1<<17), + NES_CQP_STAG_RIGHTS_REMOTE_READ = (1<<18), + NES_CQP_STAG_RIGHTS_REMOTE_WRITE = (1<<19), + NES_CQP_STAG_RIGHTS_WINDOW_BIND = (1<<20), + NES_CQP_STAG_REM_ACC_EN = (1<<21), + NES_CQP_STAG_LEAVE_PENDING = (1<<31), +}; + +enum nes_cqp_ceq_wqeword_idx { + NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX = 1, + NES_CQP_CEQ_WQE_PBL_LOW_IDX = 6, + NES_CQP_CEQ_WQE_PBL_HIGH_IDX = 7, +}; + +enum nes_cqp_ceq_bits { + NES_CQP_CEQ_4KB_CHUNK = (1<<14), + NES_CQP_CEQ_VIRT = (1<<15), +}; + +enum nes_cqp_aeq_wqeword_idx { + NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX = 1, + NES_CQP_AEQ_WQE_PBL_LOW_IDX = 6, + NES_CQP_AEQ_WQE_PBL_HIGH_IDX = 7, +}; + +enum nes_cqp_aeq_bits { + NES_CQP_AEQ_4KB_CHUNK = (1<<14), + NES_CQP_AEQ_VIRT = (1<<15), +}; + +enum nes_cqp_lmi_wqeword_idx { + NES_CQP_LMI_WQE_LMI_OFFSET_IDX = 1, + NES_CQP_LMI_WQE_FRAG_LOW_IDX = 8, + NES_CQP_LMI_WQE_FRAG_HIGH_IDX = 9, + NES_CQP_LMI_WQE_FRAG_LEN_IDX = 10, +}; + +enum nes_cqp_arp_wqeword_idx { + NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX = 6, + NES_CQP_ARP_WQE_MAC_HIGH_IDX = 7, + NES_CQP_ARP_WQE_REACHABILITY_MAX_IDX = 1, +}; + +enum nes_cqp_upload_wqeword_idx { + NES_CQP_UPLOAD_WQE_CTXT_LOW_IDX = 6, + NES_CQP_UPLOAD_WQE_CTXT_HIGH_IDX = 7, + NES_CQP_UPLOAD_WQE_HTE_IDX = 8, +}; + +enum nes_cqp_arp_bits { + NES_CQP_ARP_VALID = (1<<8), + NES_CQP_ARP_PERM = (1<<9), +}; + +enum nes_cqp_flush_bits { + NES_CQP_FLUSH_SQ = (1<<30), + NES_CQP_FLUSH_RQ = (1<<31), +}; + +enum nes_cqe_opcode_bits { + NES_CQE_STAG_VALID = (1<<6), + NES_CQE_ERROR = (1<<7), + NES_CQE_SQ = (1<<8), + NES_CQE_SE = (1<<9), + NES_CQE_PSH = (1<<29), + NES_CQE_FIN = (1<<30), + NES_CQE_VALID = (1<<31), +}; + + +enum nes_cqe_word_idx { + NES_CQE_PAYLOAD_LENGTH_IDX = 0, + NES_CQE_COMP_COMP_CTX_LOW_IDX = 2, + NES_CQE_COMP_COMP_CTX_HIGH_IDX = 3, + NES_CQE_INV_STAG_IDX = 4, + NES_CQE_QP_ID_IDX = 5, + NES_CQE_ERROR_CODE_IDX = 6, + NES_CQE_OPCODE_IDX = 7, +}; + +enum nes_ceqe_word_idx { + NES_CEQE_CQ_CTX_LOW_IDX = 0, + NES_CEQE_CQ_CTX_HIGH_IDX = 1, +}; + +enum nes_ceqe_status_bit { + NES_CEQE_VALID = (1<<31), +}; + +enum nes_int_bits { + NES_INT_CEQ0 = (1<<0), + NES_INT_CEQ1 = (1<<1), + NES_INT_CEQ2 = (1<<2), + NES_INT_CEQ3 = (1<<3), + NES_INT_CEQ4 = (1<<4), + NES_INT_CEQ5 = (1<<5), + NES_INT_CEQ6 = (1<<6), + NES_INT_CEQ7 = (1<<7), + NES_INT_CEQ8 = (1<<8), + NES_INT_CEQ9 = (1<<9), + NES_INT_CEQ10 = (1<<10), + NES_INT_CEQ11 = (1<<11), + NES_INT_CEQ12 = (1<<12), + NES_INT_CEQ13 = (1<<13), + NES_INT_CEQ14 = (1<<14), + NES_INT_CEQ15 = (1<<15), + NES_INT_AEQ0 = (1<<16), + NES_INT_AEQ1 = (1<<17), + NES_INT_AEQ2 = (1<<18), + NES_INT_AEQ3 = (1<<19), + NES_INT_AEQ4 = (1<<20), + NES_INT_AEQ5 = (1<<21), + NES_INT_AEQ6 = (1<<22), + NES_INT_AEQ7 = (1<<23), + NES_INT_MAC0 = (1<<24), + NES_INT_MAC1 = (1<<25), + NES_INT_MAC2 = (1<<26), + NES_INT_MAC3 = (1<<27), + NES_INT_TSW = (1<<28), + NES_INT_TIMER = (1<<29), + NES_INT_INTF = (1<<30), +}; + +enum nes_intf_int_bits { + NES_INTF_INT_PCIERR = (1<<0), + NES_INTF_PERIODIC_TIMER = (1<<2), + NES_INTF_ONE_SHOT_TIMER = (1<<3), + NES_INTF_INT_CRITERR = (1<<14), + NES_INTF_INT_AEQ0_OFLOW = (1<<16), + NES_INTF_INT_AEQ1_OFLOW = (1<<17), + NES_INTF_INT_AEQ2_OFLOW = (1<<18), + NES_INTF_INT_AEQ3_OFLOW = (1<<19), + NES_INTF_INT_AEQ4_OFLOW = (1<<20), + NES_INTF_INT_AEQ5_OFLOW = (1<<21), + NES_INTF_INT_AEQ6_OFLOW = (1<<22), + NES_INTF_INT_AEQ7_OFLOW = (1<<23), + NES_INTF_INT_AEQ_OFLOW = (0xff<<16), +}; + +enum nes_mac_int_bits { + NES_MAC_INT_LINK_STAT_CHG = (1<<1), + NES_MAC_INT_XGMII_EXT = (1<<2), + NES_MAC_INT_TX_UNDERFLOW = (1<<6), + NES_MAC_INT_TX_ERROR = (1<<7), +}; + +enum nes_cqe_allocate_bits { + NES_CQE_ALLOC_INC_SELECT = (1<<28), + NES_CQE_ALLOC_NOTIFY_NEXT = (1<<29), + NES_CQE_ALLOC_NOTIFY_SE = (1<<30), + NES_CQE_ALLOC_RESET = (1<<31), +}; + +enum nes_nic_rq_wqe_word_idx { + NES_NIC_RQ_WQE_LENGTH_1_0_IDX = 0, + NES_NIC_RQ_WQE_LENGTH_3_2_IDX = 1, + NES_NIC_RQ_WQE_FRAG0_LOW_IDX = 2, + NES_NIC_RQ_WQE_FRAG0_HIGH_IDX = 3, + NES_NIC_RQ_WQE_FRAG1_LOW_IDX = 4, + NES_NIC_RQ_WQE_FRAG1_HIGH_IDX = 5, + NES_NIC_RQ_WQE_FRAG2_LOW_IDX = 6, + NES_NIC_RQ_WQE_FRAG2_HIGH_IDX = 7, + NES_NIC_RQ_WQE_FRAG3_LOW_IDX = 8, + NES_NIC_RQ_WQE_FRAG3_HIGH_IDX = 9, +}; + +enum nes_nic_sq_wqe_word_idx { + NES_NIC_SQ_WQE_MISC_IDX = 0, + NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX = 1, + NES_NIC_SQ_WQE_LSO_INFO_IDX = 2, + NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX = 3, + NES_NIC_SQ_WQE_LENGTH_2_1_IDX = 4, + NES_NIC_SQ_WQE_LENGTH_4_3_IDX = 5, + NES_NIC_SQ_WQE_FRAG0_LOW_IDX = 6, + NES_NIC_SQ_WQE_FRAG0_HIGH_IDX = 7, + NES_NIC_SQ_WQE_FRAG1_LOW_IDX = 8, + NES_NIC_SQ_WQE_FRAG1_HIGH_IDX = 9, + NES_NIC_SQ_WQE_FRAG2_LOW_IDX = 10, + NES_NIC_SQ_WQE_FRAG2_HIGH_IDX = 11, + NES_NIC_SQ_WQE_FRAG3_LOW_IDX = 12, + NES_NIC_SQ_WQE_FRAG3_HIGH_IDX = 13, + NES_NIC_SQ_WQE_FRAG4_LOW_IDX = 14, + NES_NIC_SQ_WQE_FRAG4_HIGH_IDX = 15, +}; + +enum nes_iwarp_sq_wqe_word_idx { + NES_IWARP_SQ_WQE_MISC_IDX = 0, + NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX = 1, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX = 2, + NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX = 3, + NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX = 4, + NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX = 5, + NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX = 7, + NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX = 8, + NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX = 9, + NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX = 10, + NES_IWARP_SQ_WQE_RDMA_STAG_IDX = 11, + NES_IWARP_SQ_WQE_IMM_DATA_START_IDX = 12, + NES_IWARP_SQ_WQE_FRAG0_LOW_IDX = 16, + NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX = 17, + NES_IWARP_SQ_WQE_LENGTH0_IDX = 18, + NES_IWARP_SQ_WQE_STAG0_IDX = 19, + NES_IWARP_SQ_WQE_FRAG1_LOW_IDX = 20, + NES_IWARP_SQ_WQE_FRAG1_HIGH_IDX = 21, + NES_IWARP_SQ_WQE_LENGTH1_IDX = 22, + NES_IWARP_SQ_WQE_STAG1_IDX = 23, + NES_IWARP_SQ_WQE_FRAG2_LOW_IDX = 24, + NES_IWARP_SQ_WQE_FRAG2_HIGH_IDX = 25, + NES_IWARP_SQ_WQE_LENGTH2_IDX = 26, + NES_IWARP_SQ_WQE_STAG2_IDX = 27, + NES_IWARP_SQ_WQE_FRAG3_LOW_IDX = 28, + NES_IWARP_SQ_WQE_FRAG3_HIGH_IDX = 29, + NES_IWARP_SQ_WQE_LENGTH3_IDX = 30, + NES_IWARP_SQ_WQE_STAG3_IDX = 31, +}; + +enum nes_iwarp_sq_bind_wqe_word_idx { + NES_IWARP_SQ_BIND_WQE_MR_IDX = 6, + NES_IWARP_SQ_BIND_WQE_MW_IDX = 7, + NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX = 8, + NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX = 9, + NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX = 10, + NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX = 11, +}; + +enum nes_iwarp_sq_fmr_wqe_word_idx { + NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX = 7, + NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX = 8, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX = 9, + NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX = 10, + NES_IWARP_SQ_FMR_WQE_VA_FBO_HIGH_IDX = 11, + NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX = 12, + NES_IWARP_SQ_FMR_WQE_PBL_ADDR_HIGH_IDX = 13, + NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14, +}; + +enum nes_iwarp_sq_locinv_wqe_word_idx { + NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6, +}; + + +enum nes_iwarp_rq_wqe_word_idx { + NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1, + NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2, + NES_IWARP_RQ_WQE_COMP_CTX_HIGH_IDX = 3, + NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX = 4, + NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX = 5, + NES_IWARP_RQ_WQE_FRAG0_LOW_IDX = 8, + NES_IWARP_RQ_WQE_FRAG0_HIGH_IDX = 9, + NES_IWARP_RQ_WQE_LENGTH0_IDX = 10, + NES_IWARP_RQ_WQE_STAG0_IDX = 11, + NES_IWARP_RQ_WQE_FRAG1_LOW_IDX = 12, + NES_IWARP_RQ_WQE_FRAG1_HIGH_IDX = 13, + NES_IWARP_RQ_WQE_LENGTH1_IDX = 14, + NES_IWARP_RQ_WQE_STAG1_IDX = 15, + NES_IWARP_RQ_WQE_FRAG2_LOW_IDX = 16, + NES_IWARP_RQ_WQE_FRAG2_HIGH_IDX = 17, + NES_IWARP_RQ_WQE_LENGTH2_IDX = 18, + NES_IWARP_RQ_WQE_STAG2_IDX = 19, + NES_IWARP_RQ_WQE_FRAG3_LOW_IDX = 20, + NES_IWARP_RQ_WQE_FRAG3_HIGH_IDX = 21, + NES_IWARP_RQ_WQE_LENGTH3_IDX = 22, + NES_IWARP_RQ_WQE_STAG3_IDX = 23, +}; + +enum nes_nic_sq_wqe_bits { + NES_NIC_SQ_WQE_PHDR_CS_READY = (1<<21), + NES_NIC_SQ_WQE_LSO_ENABLE = (1<<22), + NES_NIC_SQ_WQE_TAGVALUE_ENABLE = (1<<23), + NES_NIC_SQ_WQE_DISABLE_CHKSUM = (1<<30), + NES_NIC_SQ_WQE_COMPLETION = (1<<31), +}; + +enum nes_nic_cqe_word_idx { + NES_NIC_CQE_ACCQP_ID_IDX = 0, + NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2, + NES_NIC_CQE_MISC_IDX = 3, +}; + +#define NES_PKT_TYPE_APBVT_BITS 0xC112 +#define NES_PKT_TYPE_APBVT_MASK 0xff3e + +#define NES_PKT_TYPE_PVALID_BITS 0x10000000 +#define NES_PKT_TYPE_PVALID_MASK 0x30000000 + +#define NES_PKT_TYPE_TCPV4_BITS 0x0110 +#define NES_PKT_TYPE_TCPV4_MASK 0x3f30 + +#define NES_PKT_TYPE_UDPV4_BITS 0x0210 +#define NES_PKT_TYPE_UDPV4_MASK 0x3f30 + +#define NES_PKT_TYPE_IPV4_BITS 0x0010 +#define NES_PKT_TYPE_IPV4_MASK 0x3f30 + +#define NES_PKT_TYPE_OTHER_BITS 0x0000 +#define NES_PKT_TYPE_OTHER_MASK 0x0030 + +#define NES_NIC_CQE_ERRV_SHIFT 16 +enum nes_nic_ev_bits { + NES_NIC_ERRV_BITS_MODE = (1<<0), + NES_NIC_ERRV_BITS_IPV4_CSUM_ERR = (1<<1), + NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR = (1<<2), + NES_NIC_ERRV_BITS_WQE_OVERRUN = (1<<3), + NES_NIC_ERRV_BITS_IPH_ERR = (1<<4), +}; + +enum nes_nic_cqe_bits { + NES_NIC_CQE_ERRV_MASK = (0xff<<NES_NIC_CQE_ERRV_SHIFT), + NES_NIC_CQE_SQ = (1<<24), + NES_NIC_CQE_ACCQP_PORT = (1<<28), + NES_NIC_CQE_ACCQP_VALID = (1<<29), + NES_NIC_CQE_TAG_VALID = (1<<30), + NES_NIC_CQE_VALID = (1<<31), +}; + +enum nes_aeqe_word_idx { + NES_AEQE_COMP_CTXT_LOW_IDX = 0, + NES_AEQE_COMP_CTXT_HIGH_IDX = 1, + NES_AEQE_COMP_QP_CQ_ID_IDX = 2, + NES_AEQE_MISC_IDX = 3, +}; + +enum nes_aeqe_bits { + NES_AEQE_QP = (1<<16), + NES_AEQE_CQ = (1<<17), + NES_AEQE_SQ = (1<<18), + NES_AEQE_INBOUND_RDMA = (1<<19), + NES_AEQE_IWARP_STATE_MASK = (7<<20), + NES_AEQE_TCP_STATE_MASK = (0xf<<24), + NES_AEQE_VALID = (1<<31), +}; + +#define NES_AEQE_IWARP_STATE_SHIFT 20 +#define NES_AEQE_TCP_STATE_SHIFT 24 + +enum nes_aeqe_iwarp_state { + NES_AEQE_IWARP_STATE_NON_EXISTANT = 0, + NES_AEQE_IWARP_STATE_IDLE = 1, + NES_AEQE_IWARP_STATE_RTS = 2, + NES_AEQE_IWARP_STATE_CLOSING = 3, + NES_AEQE_IWARP_STATE_TERMINATE = 5, + NES_AEQE_IWARP_STATE_ERROR = 6 +}; + +enum nes_aeqe_tcp_state { + NES_AEQE_TCP_STATE_NON_EXISTANT = 0, + NES_AEQE_TCP_STATE_CLOSED = 1, + NES_AEQE_TCP_STATE_LISTEN = 2, + NES_AEQE_TCP_STATE_SYN_SENT = 3, + NES_AEQE_TCP_STATE_SYN_RCVD = 4, + NES_AEQE_TCP_STATE_ESTABLISHED = 5, + NES_AEQE_TCP_STATE_CLOSE_WAIT = 6, + NES_AEQE_TCP_STATE_FIN_WAIT_1 = 7, + NES_AEQE_TCP_STATE_CLOSING = 8, + NES_AEQE_TCP_STATE_LAST_ACK = 9, + NES_AEQE_TCP_STATE_FIN_WAIT_2 = 10, + NES_AEQE_TCP_STATE_TIME_WAIT = 11 +}; + +enum nes_aeqe_aeid { + NES_AEQE_AEID_AMP_UNALLOCATED_STAG = 0x0102, + NES_AEQE_AEID_AMP_INVALID_STAG = 0x0103, + NES_AEQE_AEID_AMP_BAD_QP = 0x0104, + NES_AEQE_AEID_AMP_BAD_PD = 0x0105, + NES_AEQE_AEID_AMP_BAD_STAG_KEY = 0x0106, + NES_AEQE_AEID_AMP_BAD_STAG_INDEX = 0x0107, + NES_AEQE_AEID_AMP_BOUNDS_VIOLATION = 0x0108, + NES_AEQE_AEID_AMP_RIGHTS_VIOLATION = 0x0109, + NES_AEQE_AEID_AMP_TO_WRAP = 0x010a, + NES_AEQE_AEID_AMP_FASTREG_SHARED = 0x010b, + NES_AEQE_AEID_AMP_FASTREG_VALID_STAG = 0x010c, + NES_AEQE_AEID_AMP_FASTREG_MW_STAG = 0x010d, + NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS = 0x010e, + NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW = 0x010f, + NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH = 0x0110, + NES_AEQE_AEID_AMP_INVALIDATE_SHARED = 0x0111, + NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS = 0x0112, + NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS = 0x0113, + NES_AEQE_AEID_AMP_MWBIND_VALID_STAG = 0x0114, + NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG = 0x0115, + NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG = 0x0116, + NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG = 0x0117, + NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS = 0x0118, + NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS = 0x0119, + NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT = 0x011a, + NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED = 0x011b, + NES_AEQE_AEID_BAD_CLOSE = 0x0201, + NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE = 0x0202, + NES_AEQE_AEID_CQ_OPERATION_ERROR = 0x0203, + NES_AEQE_AEID_PRIV_OPERATION_DENIED = 0x0204, + NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO = 0x0205, + NES_AEQE_AEID_STAG_ZERO_INVALID = 0x0206, + NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN = 0x0301, + NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID = 0x0302, + NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER = 0x0303, + NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION = 0x0304, + NES_AEQE_AEID_DDP_UBE_INVALID_MO = 0x0305, + NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE = 0x0306, + NES_AEQE_AEID_DDP_UBE_INVALID_QN = 0x0307, + NES_AEQE_AEID_DDP_NO_L_BIT = 0x0308, + NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION = 0x0311, + NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE = 0x0312, + NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST = 0x0313, + NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP = 0x0314, + NES_AEQE_AEID_INVALID_ARP_ENTRY = 0x0401, + NES_AEQE_AEID_INVALID_TCP_OPTION_RCVD = 0x0402, + NES_AEQE_AEID_STALE_ARP_ENTRY = 0x0403, + NES_AEQE_AEID_LLP_CLOSE_COMPLETE = 0x0501, + NES_AEQE_AEID_LLP_CONNECTION_RESET = 0x0502, + NES_AEQE_AEID_LLP_FIN_RECEIVED = 0x0503, + NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH = 0x0504, + NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR = 0x0505, + NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE = 0x0506, + NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL = 0x0507, + NES_AEQE_AEID_LLP_SYN_RECEIVED = 0x0508, + NES_AEQE_AEID_LLP_TERMINATE_RECEIVED = 0x0509, + NES_AEQE_AEID_LLP_TOO_MANY_RETRIES = 0x050a, + NES_AEQE_AEID_LLP_TOO_MANY_KEEPALIVE_RETRIES = 0x050b, + NES_AEQE_AEID_RESET_SENT = 0x0601, + NES_AEQE_AEID_TERMINATE_SENT = 0x0602, + NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC = 0x0700 +}; + +enum nes_iwarp_sq_opcodes { + NES_IWARP_SQ_WQE_WRPDU = (1<<15), + NES_IWARP_SQ_WQE_PSH = (1<<21), + NES_IWARP_SQ_WQE_STREAMING = (1<<23), + NES_IWARP_SQ_WQE_IMM_DATA = (1<<28), + NES_IWARP_SQ_WQE_READ_FENCE = (1<<29), + NES_IWARP_SQ_WQE_LOCAL_FENCE = (1<<30), + NES_IWARP_SQ_WQE_SIGNALED_COMPL = (1<<31), +}; + +enum nes_iwarp_sq_wqe_bits { + NES_IWARP_SQ_OP_RDMAW = 0, + NES_IWARP_SQ_OP_RDMAR = 1, + NES_IWARP_SQ_OP_SEND = 3, + NES_IWARP_SQ_OP_SENDINV = 4, + NES_IWARP_SQ_OP_SENDSE = 5, + NES_IWARP_SQ_OP_SENDSEINV = 6, + NES_IWARP_SQ_OP_BIND = 8, + NES_IWARP_SQ_OP_FAST_REG = 9, + NES_IWARP_SQ_OP_LOCINV = 10, + NES_IWARP_SQ_OP_RDMAR_LOCINV = 11, + NES_IWARP_SQ_OP_NOP = 12, +}; + +#define NES_EEPROM_READ_REQUEST (1<<16) +#define NES_MAC_ADDR_VALID (1<<20) + +/* + * NES index registers init values. + */ +struct nes_init_values { + u32 index; + u32 data; + u8 wrt; +}; + +/* + * NES registers in BAR0. + */ +struct nes_pci_regs { + u32 int_status; + u32 int_mask; + u32 int_pending; + u32 intf_int_status; + u32 intf_int_mask; + u32 other_regs[59]; /* pad out to 256 bytes for now */ +}; + +#define NES_CQP_SQ_SIZE 128 +#define NES_CCQ_SIZE 128 +#define NES_NIC_WQ_SIZE 512 +#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512)) +#define NES_NIC_BACK_STORE 0x00038000 + +struct nes_device; + +struct nes_hw_nic_qp_context { + __le32 context_words[6]; +}; + +struct nes_hw_nic_sq_wqe { + __le32 wqe_words[16]; +}; + +struct nes_hw_nic_rq_wqe { + __le32 wqe_words[16]; +}; + +struct nes_hw_nic_cqe { + __le32 cqe_words[4]; +}; + +struct nes_hw_cqp_qp_context { + __le32 context_words[4]; +}; + +struct nes_hw_cqp_wqe { + __le32 wqe_words[16]; +}; + +struct nes_hw_qp_wqe { + __le32 wqe_words[32]; +}; + +struct nes_hw_cqe { + __le32 cqe_words[8]; +}; + +struct nes_hw_ceqe { + __le32 ceqe_words[2]; +}; + +struct nes_hw_aeqe { + __le32 aeqe_words[4]; +}; + +struct nes_cqp_request { + union { + u64 cqp_callback_context; + void *cqp_callback_pointer; + }; + wait_queue_head_t waitq; + struct nes_hw_cqp_wqe cqp_wqe; + struct list_head list; + atomic_t refcount; + void (*cqp_callback)(struct nes_device *nesdev, struct nes_cqp_request *cqp_request); + u16 major_code; + u16 minor_code; + u8 waiting; + u8 request_done; + u8 dynamic; + u8 callback; +}; + +struct nes_hw_cqp { + struct nes_hw_cqp_wqe *sq_vbase; + dma_addr_t sq_pbase; + spinlock_t lock; + wait_queue_head_t waitq; + u16 qp_id; + u16 sq_head; + u16 sq_tail; + u16 sq_size; +}; + +#define NES_FIRST_FRAG_SIZE 128 +struct nes_first_frag { + u8 buffer[NES_FIRST_FRAG_SIZE]; +}; + +struct nes_hw_nic { + struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */ + struct nes_hw_nic_sq_wqe *sq_vbase; /* virtual address of sq */ + struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ + struct sk_buff *tx_skb[NES_NIC_WQ_SIZE]; + struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; + dma_addr_t frag_paddr[NES_NIC_WQ_SIZE]; + unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)]; + dma_addr_t sq_pbase; /* PCI memory for host rings */ + dma_addr_t rq_pbase; /* PCI memory for host rings */ + + u16 qp_id; + u16 sq_head; + u16 sq_tail; + u16 sq_size; + u16 rq_head; + u16 rq_tail; + u16 rq_size; + u8 replenishing_rq; + u8 reserved; + + spinlock_t sq_lock; + spinlock_t rq_lock; +}; + +struct nes_hw_nic_cq { + struct nes_hw_nic_cqe volatile *cq_vbase; /* PCI memory for host rings */ + void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); + dma_addr_t cq_pbase; /* PCI memory for host rings */ + int rx_cqes_completed; + int cqe_allocs_pending; + int rx_pkts_indicated; + u16 cq_head; + u16 cq_size; + u16 cq_number; + u8 cqes_pending; +}; + +struct nes_hw_qp { + struct nes_hw_qp_wqe *sq_vbase; /* PCI memory for host rings */ + struct nes_hw_qp_wqe *rq_vbase; /* PCI memory for host rings */ + void *q2_vbase; /* PCI memory for host rings */ + dma_addr_t sq_pbase; /* PCI memory for host rings */ + dma_addr_t rq_pbase; /* PCI memory for host rings */ + dma_addr_t q2_pbase; /* PCI memory for host rings */ + u32 qp_id; + u16 sq_head; + u16 sq_tail; + u16 sq_size; + u16 rq_head; + u16 rq_tail; + u16 rq_size; + u8 rq_encoded_size; + u8 sq_encoded_size; +}; + +struct nes_hw_cq { + struct nes_hw_cqe volatile *cq_vbase; /* PCI memory for host rings */ + void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq); + dma_addr_t cq_pbase; /* PCI memory for host rings */ + u16 cq_head; + u16 cq_size; + u16 cq_number; +}; + +struct nes_hw_ceq { + struct nes_hw_ceqe volatile *ceq_vbase; /* PCI memory for host rings */ + dma_addr_t ceq_pbase; /* PCI memory for host rings */ + u16 ceq_head; + u16 ceq_size; +}; + +struct nes_hw_aeq { + struct nes_hw_aeqe volatile *aeq_vbase; /* PCI memory for host rings */ + dma_addr_t aeq_pbase; /* PCI memory for host rings */ + u16 aeq_head; + u16 aeq_size; +}; + +struct nic_qp_map { + u8 qpid; + u8 nic_index; + u8 logical_port; + u8 is_hnic; +}; + +#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000 +#define NES_CQP_ARP_AEQ_INDEX_SHIFT 16 + +#define NES_CQP_APBVT_ADD 0x00008000 +#define NES_CQP_APBVT_NIC_SHIFT 16 + +#define NES_ARP_ADD 1 +#define NES_ARP_DELETE 2 +#define NES_ARP_RESOLVE 3 + +#define NES_MAC_SW_IDLE 0 +#define NES_MAC_SW_INTERRUPT 1 +#define NES_MAC_SW_MH 2 + +struct nes_arp_entry { + u32 ip_addr; + u8 mac_addr[ETH_ALEN]; +}; + +#define NES_NIC_FAST_TIMER 96 +#define NES_NIC_FAST_TIMER_LOW 40 +#define NES_NIC_FAST_TIMER_HIGH 1000 +#define DEFAULT_NES_QL_HIGH 256 +#define DEFAULT_NES_QL_LOW 16 +#define DEFAULT_NES_QL_TARGET 64 +#define DEFAULT_JUMBO_NES_QL_LOW 12 +#define DEFAULT_JUMBO_NES_QL_TARGET 40 +#define DEFAULT_JUMBO_NES_QL_HIGH 128 +#define NES_NIC_CQ_DOWNWARD_TREND 8 + +struct nes_hw_tune_timer { + //u16 cq_count; + u16 threshold_low; + u16 threshold_target; + u16 threshold_high; + u16 timer_in_use; + u16 timer_in_use_old; + u16 timer_in_use_min; + u16 timer_in_use_max; + u8 timer_direction_upward; + u8 timer_direction_downward; + u16 cq_count_old; + u8 cq_direction_downward; +}; + +#define NES_TIMER_INT_LIMIT 2 +#define NES_TIMER_INT_LIMIT_DYNAMIC 10 +#define NES_TIMER_ENABLE_LIMIT 4 +#define NES_MAX_LINK_INTERRUPTS 128 +#define NES_MAX_LINK_CHECK 200 + +struct nes_adapter { + u64 fw_ver; + unsigned long *allocated_qps; + unsigned long *allocated_cqs; + unsigned long *allocated_mrs; + unsigned long *allocated_pds; + unsigned long *allocated_arps; + struct nes_qp **qp_table; + struct workqueue_struct *work_q; + + struct list_head list; + struct list_head active_listeners; + /* list of the netdev's associated with each logical port */ + struct list_head nesvnic_list[4]; + + struct timer_list mh_timer; + struct timer_list lc_timer; + struct work_struct work; + spinlock_t resource_lock; + spinlock_t phy_lock; + spinlock_t pbl_lock; + spinlock_t periodic_timer_lock; + + struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE]; + + /* Adapter CEQ and AEQs */ + struct nes_hw_ceq ceq[16]; + struct nes_hw_aeq aeq[8]; + + struct nes_hw_tune_timer tune_timer; + + unsigned long doorbell_start; + + u32 hw_rev; + u32 vendor_id; + u32 vendor_part_id; + u32 device_cap_flags; + u32 tick_delta; + u32 timer_int_req; + u32 arp_table_size; + u32 next_arp_index; + + u32 max_mr; + u32 max_256pbl; + u32 max_4kpbl; + u32 free_256pbl; + u32 free_4kpbl; + u32 max_mr_size; + u32 max_qp; + u32 next_qp; + u32 max_irrq; + u32 max_qp_wr; + u32 max_sge; + u32 max_cq; + u32 next_cq; + u32 max_cqe; + u32 max_pd; + u32 base_pd; + u32 next_pd; + u32 hte_index_mask; + + /* EEPROM information */ + u32 rx_pool_size; + u32 tx_pool_size; + u32 rx_threshold; + u32 tcp_timer_core_clk_divisor; + u32 iwarp_config; + u32 cm_config; + u32 sws_timer_config; + u32 tcp_config1; + u32 wqm_wat; + u32 core_clock; + u32 firmware_version; + + u32 nic_rx_eth_route_err; + + u32 et_rx_coalesce_usecs; + u32 et_rx_max_coalesced_frames; + u32 et_rx_coalesce_usecs_irq; + u32 et_rx_max_coalesced_frames_irq; + u32 et_pkt_rate_low; + u32 et_rx_coalesce_usecs_low; + u32 et_rx_max_coalesced_frames_low; + u32 et_pkt_rate_high; + u32 et_rx_coalesce_usecs_high; + u32 et_rx_max_coalesced_frames_high; + u32 et_rate_sample_interval; + u32 timer_int_limit; + + /* Adapter base MAC address */ + u32 mac_addr_low; + u16 mac_addr_high; + + u16 firmware_eeprom_offset; + u16 software_eeprom_offset; + + u16 max_irrq_wr; + + /* pd config for each port */ + u16 pd_config_size[4]; + u16 pd_config_base[4]; + + u16 link_interrupt_count[4]; + + /* the phy index for each port */ + u8 phy_index[4]; + u8 mac_sw_state[4]; + u8 mac_link_down[4]; + u8 phy_type[4]; + + /* PCI information */ + unsigned int devfn; + unsigned char bus_number; + unsigned char OneG_Mode; + + unsigned char ref_count; + u8 netdev_count; + u8 netdev_max; /* from host nic address count in EEPROM */ + u8 port_count; + u8 virtwq; + u8 et_use_adaptive_rx_coalesce; + u8 adapter_fcn_count; +}; + +struct nes_pbl { + u64 *pbl_vbase; + dma_addr_t pbl_pbase; + struct page *page; + unsigned long user_base; + u32 pbl_size; + struct list_head list; + /* TODO: need to add list for two level tables */ +}; + +struct nes_listener { + struct work_struct work; + struct workqueue_struct *wq; + struct nes_vnic *nesvnic; + struct iw_cm_id *cm_id; + struct list_head list; + unsigned long socket; + u8 accept_failed; +}; + +struct nes_ib_device; + +struct nes_vnic { + struct nes_ib_device *nesibdev; + u64 sq_full; + u64 sq_locked; + u64 tso_requests; + u64 segmented_tso_requests; + u64 linearized_skbs; + u64 tx_sw_dropped; + u64 endnode_nstat_rx_discard; + u64 endnode_nstat_rx_octets; + u64 endnode_nstat_rx_frames; + u64 endnode_nstat_tx_octets; + u64 endnode_nstat_tx_frames; + u64 endnode_ipv4_tcp_retransmits; + /* void *mem; */ + struct nes_device *nesdev; + struct net_device *netdev; + struct vlan_group *vlan_grp; + atomic_t rx_skbs_needed; + atomic_t rx_skb_timer_running; + int budget; + u32 msg_enable; + /* u32 tx_avail; */ + __be32 local_ipaddr; + struct napi_struct napi; + spinlock_t tx_lock; /* could use netdev tx lock? */ + struct timer_list rq_wqes_timer; + u32 nic_mem_size; + void *nic_vbase; + dma_addr_t nic_pbase; + struct nes_hw_nic nic; + struct nes_hw_nic_cq nic_cq; + u32 mcrq_qp_id; + struct nes_ucontext *mcrq_ucontext; + struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev); + void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int); + int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr ); + struct net_device_stats netstats; + /* used to put the netdev on the adapters logical port list */ + struct list_head list; + u16 max_frame_size; + u8 netdev_open; + u8 linkup; + u8 logical_port; + u8 netdev_index; /* might not be needed, indexes nesdev->netdev */ + u8 perfect_filter_index; + u8 nic_index; + u8 qp_nic_index[4]; + u8 next_qp_nic_index; + u8 of_device_registered; + u8 rdma_enabled; + u8 rx_checksum_disabled; +}; + +struct nes_ib_device { + struct ib_device ibdev; + struct nes_vnic *nesvnic; + + /* Virtual RNIC Limits */ + u32 max_mr; + u32 max_qp; + u32 max_cq; + u32 max_pd; + u32 num_mr; + u32 num_qp; + u32 num_cq; + u32 num_pd; +}; + +#define nes_vlan_rx vlan_hwaccel_receive_skb +#define nes_netif_rx netif_receive_skb + +#endif /* __NES_HW_H */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c new file mode 100644 index 000000000000..b6cc265aa9a4 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -0,0 +1,1703 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/ethtool.h> +#include <net/tcp.h> + +#include <net/inet_common.h> +#include <linux/inet.h> + +#include "nes.h" + +static struct nic_qp_map nic_qp_mapping_0[] = { + {16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0}, + {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}, + {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0}, + {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map nic_qp_mapping_1[] = { + {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0}, + {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map nic_qp_mapping_2[] = { + {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0} +}; + +static struct nic_qp_map nic_qp_mapping_3[] = { + {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map nic_qp_mapping_4[] = { + {28,8,0,0},{32,12,0,0} +}; + +static struct nic_qp_map nic_qp_mapping_5[] = { + {29,9,1,0},{33,13,1,0} +}; + +static struct nic_qp_map nic_qp_mapping_6[] = { + {30,10,2,0},{34,14,2,0} +}; + +static struct nic_qp_map nic_qp_mapping_7[] = { + {31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map *nic_qp_mapping_per_function[] = { + nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3, + nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7 +}; + +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK + | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; +static int debug = -1; + + +static int nes_netdev_open(struct net_device *); +static int nes_netdev_stop(struct net_device *); +static int nes_netdev_start_xmit(struct sk_buff *, struct net_device *); +static struct net_device_stats *nes_netdev_get_stats(struct net_device *); +static void nes_netdev_tx_timeout(struct net_device *); +static int nes_netdev_set_mac_address(struct net_device *, void *); +static int nes_netdev_change_mtu(struct net_device *, int); + +/** + * nes_netdev_poll + */ +static int nes_netdev_poll(struct napi_struct *napi, int budget) +{ + struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi); + struct net_device *netdev = nesvnic->netdev; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq; + + nesvnic->budget = budget; + nescq->cqes_pending = 0; + nescq->rx_cqes_completed = 0; + nescq->cqe_allocs_pending = 0; + nescq->rx_pkts_indicated = 0; + + nes_nic_ce_handler(nesdev, nescq); + + if (nescq->cqes_pending == 0) { + netif_rx_complete(netdev, napi); + /* clear out completed cqes and arm */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + nescq->cq_number | (nescq->cqe_allocs_pending << 16)); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + } else { + /* clear out completed cqes but don't arm */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, + nescq->cq_number | (nescq->cqe_allocs_pending << 16)); + nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n", + nesvnic->netdev->name); + } + return nescq->rx_pkts_indicated; +} + + +/** + * nes_netdev_open - Activate the network interface; ifconfig + * ethx up. + */ +static int nes_netdev_open(struct net_device *netdev) +{ + u32 macaddr_low; + u16 macaddr_high; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + int ret; + int i; + struct nes_vnic *first_nesvnic; + u32 nic_active_bit; + u32 nic_active; + + assert(nesdev != NULL); + + first_nesvnic = list_entry(nesdev->nesadapter->nesvnic_list[nesdev->mac_index].next, + struct nes_vnic, list); + + if (netif_msg_ifup(nesvnic)) + printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name); + + ret = nes_init_nic_qp(nesdev, netdev); + if (ret) { + return ret; + } + + netif_carrier_off(netdev); + netif_stop_queue(netdev); + + if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) { + nesvnic->nesibdev = nes_init_ofa_device(netdev); + if (nesvnic->nesibdev == NULL) { + printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name); + } else { + nesvnic->nesibdev->nesvnic = nesvnic; + ret = nes_register_ofa_device(nesvnic->nesibdev); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n", + netdev->name, ret); + } + } + } + /* Set packet filters */ + nic_active_bit = 1 << nesvnic->nic_index; + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); + + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high += (u16)netdev->dev_addr[1]; + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; + + /* Program the various MAC regs */ + for (i = 0; i < NES_MAX_PORT_COUNT; i++) { + if (nesvnic->qp_nic_index[i] == 0xf) { + break; + } + nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW" + " (Addr:%08X) = %08X, HIGH = %08X.\n", + i, nesvnic->qp_nic_index[i], + NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8), + macaddr_low, + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), + macaddr_low); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + } + + + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + nesvnic->nic_cq.cq_number); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + + if (first_nesvnic->linkup) { + /* Enable network packets */ + nesvnic->linkup = 1; + netif_start_queue(netdev); + netif_carrier_on(netdev); + } + napi_enable(&nesvnic->napi); + nesvnic->netdev_open = 1; + + return 0; +} + + +/** + * nes_netdev_stop + */ +static int nes_netdev_stop(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 nic_active_mask; + u32 nic_active; + + nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", + nesvnic, nesdev, netdev, netdev->name); + if (nesvnic->netdev_open == 0) + return 0; + + if (netif_msg_ifdown(nesvnic)) + printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); + + /* Disable network packets */ + napi_disable(&nesvnic->napi); + netif_stop_queue(netdev); + if ((nesdev->netdev[0] == netdev) & (nesvnic->logical_port == nesdev->mac_index)) { + nes_write_indexed(nesdev, + NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); + } + + nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); + nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+ + (nesvnic->perfect_filter_index*8), 0); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); + + + if (nesvnic->of_device_registered) { + nes_destroy_ofa_device(nesvnic->nesibdev); + nesvnic->nesibdev = NULL; + nesvnic->of_device_registered = 0; + } + nes_destroy_nic_qp(nesvnic); + + nesvnic->netdev_open = 0; + + return 0; +} + + +/** + * nes_nic_send + */ +static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_nic *nesnic = &nesvnic->nic; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct tcphdr *tcph; + __le16 *wqe_fragment_length; + u32 wqe_misc; + u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ + u16 skb_fragment_index; + dma_addr_t bus_address; + + nic_sqe = &nesnic->sq_vbase[nesnic->sq_head]; + wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + + /* setup the VLAN tag if present */ + if (vlan_tx_tag_present(skb)) { + nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n", + netdev->name, vlan_tx_tag_get(skb)); + wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE; + wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb); + } else + wqe_misc = 0; + + /* bump past the vlan tag */ + wqe_fragment_length++; + /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tcph = tcp_hdr(skb); + if (1) { + if (skb_is_gso(skb)) { + /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n", + netdev->name, skb_is_gso(skb)); */ + wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | + NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, + ((u32)tcph->doff) | + (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); + } else { + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION; + } + } + } else { /* CHECKSUM_HW */ + wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION; + } + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, + skb->len); + memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, + skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb))); + wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), + skb_headlen(skb))); + wqe_fragment_length[1] = 0; + if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { + if ((skb_shinfo(skb)->nr_frags + 1) > 4) { + nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n", + netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb)); + kfree_skb(skb); + nesvnic->tx_sw_dropped++; + return NETDEV_TX_LOCKED; + } + set_bit(nesnic->sq_head, nesnic->first_frag_overflow); + bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE, + skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE); + wqe_fragment_length[wqe_fragment_index++] = + cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE); + wqe_fragment_length[wqe_fragment_index] = 0; + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + ((u64)(bus_address))); + nesnic->tx_skb[nesnic->sq_head] = skb; + } + + if (skb_headlen(skb) == skb->len) { + if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) { + nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0; + nesnic->tx_skb[nesnic->sq_head] = NULL; + dev_kfree_skb(skb); + } + } else { + /* Deal with Fragments */ + nesnic->tx_skb[nesnic->sq_head] = skb; + for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags; + skb_fragment_index++) { + bus_address = pci_map_page( nesdev->pcidev, + skb_shinfo(skb)->frags[skb_fragment_index].page, + skb_shinfo(skb)->frags[skb_fragment_index].page_offset, + skb_shinfo(skb)->frags[skb_fragment_index].size, + PCI_DMA_TODEVICE); + wqe_fragment_length[wqe_fragment_index] = + cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size); + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), + bus_address); + wqe_fragment_index++; + if (wqe_fragment_index < 5) + wqe_fragment_length[wqe_fragment_index] = 0; + } + } + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc); + nesnic->sq_head++; + nesnic->sq_head &= nesnic->sq_size - 1; + + return NETDEV_TX_OK; +} + + +/** + * nes_netdev_start_xmit + */ +static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_nic *nesnic = &nesvnic->nic; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct tcphdr *tcph; + /* struct udphdr *udph; */ +#define NES_MAX_TSO_FRAGS 18 + /* 64K segment plus overflow on each side */ + dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; + dma_addr_t bus_address; + u32 tso_frag_index; + u32 tso_frag_count; + u32 tso_wqe_length; + u32 curr_tcp_seq; + u32 wqe_count=1; + u32 send_rc; + struct iphdr *iph; + unsigned long flags; + __le16 *wqe_fragment_length; + u32 nr_frags; + u32 original_first_length; +// u64 *wqe_fragment_address; + /* first fragment (0) is used by copy buffer */ + u16 wqe_fragment_index=1; + u16 hoffset; + u16 nhoffset; + u16 wqes_needed; + u16 wqes_available; + u32 old_head; + u32 wqe_misc; + + /* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + " (%u frags), tso_size=%u\n", + netdev->name, skb->len, skb_headlen(skb), + skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + */ + + if (!netif_carrier_ok(netdev)) + return NETDEV_TX_OK; + + if (netif_queue_stopped(netdev)) + return NETDEV_TX_BUSY; + + local_irq_save(flags); + if (!spin_trylock(&nesnic->sq_lock)) { + local_irq_restore(flags); + nesvnic->sq_locked++; + return NETDEV_TX_LOCKED; + } + + /* Check if SQ is full */ + if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) { + if (!netif_queue_stopped(netdev)) { + netif_stop_queue(netdev); + barrier(); + if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) { + netif_start_queue(netdev); + goto sq_no_longer_full; + } + } + nesvnic->sq_full++; + spin_unlock_irqrestore(&nesnic->sq_lock, flags); + return NETDEV_TX_BUSY; + } + +sq_no_longer_full: + nr_frags = skb_shinfo(skb)->nr_frags; + if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { + nr_frags++; + } + /* Check if too many fragments */ + if (unlikely((nr_frags > 4))) { + if (skb_is_gso(skb)) { + nesvnic->segmented_tso_requests++; + nesvnic->tso_requests++; + old_head = nesnic->sq_head; + /* Basically 4 fragments available per WQE with extended fragments */ + wqes_needed = nr_frags >> 2; + wqes_needed += (nr_frags&3)?1:0; + wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) & + (nesnic->sq_size - 1); + + if (unlikely(wqes_needed > wqes_available)) { + if (!netif_queue_stopped(netdev)) { + netif_stop_queue(netdev); + barrier(); + wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) & + (nesnic->sq_size - 1); + if (wqes_needed <= wqes_available) { + netif_start_queue(netdev); + goto tso_sq_no_longer_full; + } + } + nesvnic->sq_full++; + spin_unlock_irqrestore(&nesnic->sq_lock, flags); + nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n", + netdev->name); + return NETDEV_TX_BUSY; + } +tso_sq_no_longer_full: + /* Map all the buffers */ + for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags; + tso_frag_count++) { + tso_bus_address[tso_frag_count] = pci_map_page( nesdev->pcidev, + skb_shinfo(skb)->frags[tso_frag_count].page, + skb_shinfo(skb)->frags[tso_frag_count].page_offset, + skb_shinfo(skb)->frags[tso_frag_count].size, + PCI_DMA_TODEVICE); + } + + tso_frag_index = 0; + curr_tcp_seq = ntohl(tcp_hdr(skb)->seq); + hoffset = skb_transport_header(skb) - skb->data; + nhoffset = skb_network_header(skb) - skb->data; + original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2); + + for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) { + tso_wqe_length = 0; + nic_sqe = &nesnic->sq_vbase[nesnic->sq_head]; + wqe_fragment_length = + (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* setup the VLAN tag if present */ + if (vlan_tx_tag_present(skb)) { + nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n", + netdev->name, vlan_tx_tag_get(skb) ); + wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE; + wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb); + } else + wqe_misc = 0; + + /* bump past the vlan tag */ + wqe_fragment_length++; + + /* Assumes header totally fits in allocated buffer and is in first fragment */ + if (original_first_length > NES_FIRST_FRAG_SIZE) { + nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", + original_first_length, NES_FIRST_FRAG_SIZE); + nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + " (%u frags), tso_size=%u\n", + netdev->name, + skb->len, skb_headlen(skb), + skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + } + memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, + skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), + original_first_length)); + iph = (struct iphdr *) + (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]); + tcph = (struct tcphdr *) + (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]); + if ((wqe_count+1)!=(u32)wqes_needed) { + tcph->fin = 0; + tcph->psh = 0; + tcph->rst = 0; + tcph->urg = 0; + } + if (wqe_count) { + tcph->syn = 0; + } + tcph->seq = htonl(curr_tcp_seq); + wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), + original_first_length)); + + wqe_fragment_index = 1; + if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) { + set_bit(nesnic->sq_head, nesnic->first_frag_overflow); + bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length, + skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE); + wqe_fragment_length[wqe_fragment_index++] = + cpu_to_le16(skb_headlen(skb) - original_first_length); + wqe_fragment_length[wqe_fragment_index] = 0; + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + bus_address); + } + while (wqe_fragment_index < 5) { + wqe_fragment_length[wqe_fragment_index] = + cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size); + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), + (u64)tso_bus_address[tso_frag_index]); + wqe_fragment_index++; + tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size; + if (wqe_fragment_index < 5) + wqe_fragment_length[wqe_fragment_index] = 0; + if (tso_frag_index == tso_frag_count) + break; + } + if ((wqe_count+1) == (u32)wqes_needed) { + nesnic->tx_skb[nesnic->sq_head] = skb; + } else { + nesnic->tx_skb[nesnic->sq_head] = NULL; + } + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); + if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) { + wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; + } else { + iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); + } + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, + wqe_misc); + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, + ((u32)tcph->doff) | (((u32)hoffset) << 4)); + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, + tso_wqe_length + original_first_length); + curr_tcp_seq += tso_wqe_length; + nesnic->sq_head++; + nesnic->sq_head &= nesnic->sq_size-1; + } + } else { + nesvnic->linearized_skbs++; + hoffset = skb_transport_header(skb) - skb->data; + nhoffset = skb_network_header(skb) - skb->data; + skb_linearize(skb); + skb_set_transport_header(skb, hoffset); + skb_set_network_header(skb, nhoffset); + send_rc = nes_nic_send(skb, netdev); + if (send_rc != NETDEV_TX_OK) { + spin_unlock_irqrestore(&nesnic->sq_lock, flags); + return NETDEV_TX_OK; + } + } + } else { + send_rc = nes_nic_send(skb, netdev); + if (send_rc != NETDEV_TX_OK) { + spin_unlock_irqrestore(&nesnic->sq_lock, flags); + return NETDEV_TX_OK; + } + } + + barrier(); + + if (wqe_count) + nes_write32(nesdev->regs+NES_WQE_ALLOC, + (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id); + + netdev->trans_start = jiffies; + spin_unlock_irqrestore(&nesnic->sq_lock, flags); + + return NETDEV_TX_OK; +} + + +/** + * nes_netdev_get_stats + */ +static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u64 u64temp; + u32 u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->endnode_nstat_rx_discard += u32temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_rx_octets += u64temp; + nesvnic->netstats.rx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_rx_frames += u64temp; + nesvnic->netstats.rx_packets += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_tx_octets += u64temp; + nesvnic->netstats.tx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_tx_frames += u64temp; + nesvnic->netstats.tx_packets += u64temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_short_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_oversized_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_jabber_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_length_errors += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_crc_errors += u32temp; + nesvnic->netstats.rx_crc_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_tx_errors += u32temp; + nesvnic->netstats.tx_errors += u32temp; + + return &nesvnic->netstats; +} + + +/** + * nes_netdev_tx_timeout + */ +static void nes_netdev_tx_timeout(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + if (netif_msg_timer(nesvnic)) + nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name); +} + + +/** + * nes_netdev_set_mac_address + */ +static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct sockaddr *mac_addr = p; + int i; + u32 macaddr_low; + u16 macaddr_high; + + if (!is_valid_ether_addr(mac_addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); + printk(PFX "%s: Address length = %d, Address = %02X%02X%02X%02X%02X%02X..\n", + __FUNCTION__, netdev->addr_len, + mac_addr->sa_data[0], mac_addr->sa_data[1], + mac_addr->sa_data[2], mac_addr->sa_data[3], + mac_addr->sa_data[4], mac_addr->sa_data[5]); + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high += (u16)netdev->dev_addr[1]; + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; + + for (i = 0; i < NES_MAX_PORT_COUNT; i++) { + if (nesvnic->qp_nic_index[i] == 0xf) { + break; + } + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), + macaddr_low); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + } + return 0; +} + + +/** + * nes_netdev_set_multicast_list + */ +void nes_netdev_set_multicast_list(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct dev_mc_list *multicast_addr; + u32 nic_active_bit; + u32 nic_active; + u32 perfect_filter_register_address; + u32 macaddr_low; + u16 macaddr_high; + u8 mc_all_on = 0; + u8 mc_index; + int mc_nic_index = -1; + + nic_active_bit = 1 << nesvnic->nic_index; + + if (netdev->flags & IFF_PROMISC) { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + mc_all_on = 1; + } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) || + (nesvnic->nic_index > 3)) { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + mc_all_on = 1; + } else { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + } + + nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n", + netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0, + (netdev->flags & IFF_ALLMULTI)?1:0); + if (!mc_all_on) { + multicast_addr = netdev->mc_list; + perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80; + perfect_filter_register_address += nesvnic->nic_index*0x40; + for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { + while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0)) + multicast_addr = multicast_addr->next; + + if (mc_nic_index < 0) + mc_nic_index = nesvnic->nic_index; + if (multicast_addr) { + nes_debug(NES_DBG_NIC_RX, "Assigning MC Address = %02X%02X%02X%02X%02X%02X to register 0x%04X nic_idx=%d\n", + multicast_addr->dmi_addr[0], multicast_addr->dmi_addr[1], + multicast_addr->dmi_addr[2], multicast_addr->dmi_addr[3], + multicast_addr->dmi_addr[4], multicast_addr->dmi_addr[5], + perfect_filter_register_address+(mc_index * 8), mc_nic_index); + macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; + macaddr_high += (u16)multicast_addr->dmi_addr[1]; + macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; + macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; + macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; + macaddr_low += (u32)multicast_addr->dmi_addr[5]; + nes_write_indexed(nesdev, + perfect_filter_register_address+(mc_index * 8), + macaddr_low); + nes_write_indexed(nesdev, + perfect_filter_register_address+4+(mc_index * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)(1<<mc_nic_index)) << 16))); + multicast_addr = multicast_addr->next; + } else { + nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", + perfect_filter_register_address+(mc_index * 8)); + nes_write_indexed(nesdev, + perfect_filter_register_address+4+(mc_index * 8), + 0); + } + } + } +} + + +/** + * nes_netdev_change_mtu + */ +static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + int ret = 0; + u8 jumbomode=0; + + if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) + return -EINVAL; + + netdev->mtu = new_mtu; + nesvnic->max_frame_size = new_mtu+ETH_HLEN; + + if (netdev->mtu > 1500) { + jumbomode=1; + } + nes_nic_init_timer_defaults(nesdev, jumbomode); + + if (netif_running(netdev)) { + nes_netdev_stop(netdev); + nes_netdev_open(netdev); + } + + return ret; +} + + +/** + * nes_netdev_exit - destroy network device + */ +void nes_netdev_exit(struct nes_vnic *nesvnic) +{ + struct net_device *netdev = nesvnic->netdev; + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + + nes_debug(NES_DBG_SHUTDOWN, "\n"); + + // destroy the ibdevice if RDMA enabled + if ((nesvnic->rdma_enabled)&&(nesvnic->of_device_registered)) { + nes_destroy_ofa_device( nesibdev ); + nesvnic->of_device_registered = 0; + nesvnic->nesibdev = NULL; + } + unregister_netdev(netdev); + nes_debug(NES_DBG_SHUTDOWN, "\n"); +} + + +#define NES_ETHTOOL_STAT_COUNT 55 +static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = { + "Link Change Interrupts", + "Linearized SKBs", + "T/GSO Requests", + "Pause Frames Sent", + "Pause Frames Received", + "Internal Routing Errors", + "SQ SW Dropped SKBs", + "SQ Locked", + "SQ Full", + "Segmented TSO Requests", + "Rx Symbol Errors", + "Rx Jabber Errors", + "Rx Oversized Frames", + "Rx Short Frames", + "Endnode Rx Discards", + "Endnode Rx Octets", + "Endnode Rx Frames", + "Endnode Tx Octets", + "Endnode Tx Frames", + "mh detected", + "mh pauses", + "Retransmission Count", + "CM Connects", + "CM Accepts", + "Disconnects", + "Connected Events", + "Connect Requests", + "CM Rejects", + "ModifyQP Timeouts", + "CreateQPs", + "SW DestroyQPs", + "DestroyQPs", + "CM Closes", + "CM Packets Sent", + "CM Packets Bounced", + "CM Packets Created", + "CM Packets Rcvd", + "CM Packets Dropped", + "CM Packets Retrans", + "CM Listens Created", + "CM Listens Destroyed", + "CM Backlog Drops", + "CM Loopbacks", + "CM Nodes Created", + "CM Nodes Destroyed", + "CM Accel Drops", + "CM Resets Received", + "Timer Inits", + "CQ Depth 1", + "CQ Depth 4", + "CQ Depth 16", + "CQ Depth 24", + "CQ Depth 32", + "CQ Depth 128", + "CQ Depth 256", +}; + + +/** + * nes_netdev_get_rx_csum + */ +static u32 nes_netdev_get_rx_csum (struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + if (nesvnic->rx_checksum_disabled) + return 0; + else + return 1; +} + + +/** + * nes_netdev_set_rc_csum + */ +static int nes_netdev_set_rx_csum(struct net_device *netdev, u32 enable) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + if (enable) + nesvnic->rx_checksum_disabled = 0; + else + nesvnic->rx_checksum_disabled = 1; + return 0; +} + + +/** + * nes_netdev_get_stats_count + */ +static int nes_netdev_get_stats_count(struct net_device *netdev) +{ + return NES_ETHTOOL_STAT_COUNT; +} + + +/** + * nes_netdev_get_strings + */ +static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset, + u8 *ethtool_strings) +{ + if (stringset == ETH_SS_STATS) + memcpy(ethtool_strings, + &nes_ethtool_stringset, + sizeof(nes_ethtool_stringset)); +} + + +/** + * nes_netdev_get_ethtool_stats + */ +static void nes_netdev_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values) +{ + u64 u64temp; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 nic_count; + u32 u32temp; + + target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT; + target_stat_values[0] = nesvnic->nesdev->link_status_interrupts; + target_stat_values[1] = nesvnic->linearized_skbs; + target_stat_values[2] = nesvnic->tso_requests; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_pause_frames_sent += u32temp; + target_stat_values[3] = nesvnic->nesdev->mac_pause_frames_sent; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_pause_frames_received += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); + nesvnic->nesdev->port_rx_discards += u32temp; + nesvnic->netstats.rx_dropped += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); + nesvnic->nesdev->port_tx_discards += u32temp; + nesvnic->netstats.tx_dropped += u32temp; + + for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) { + if (nesvnic->qp_nic_index[nic_count] == 0xf) + break; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + + (nesvnic->qp_nic_index[nic_count]*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->endnode_nstat_rx_discard += u32temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_rx_octets += u64temp; + nesvnic->netstats.rx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_rx_frames += u64temp; + nesvnic->netstats.rx_packets += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_tx_octets += u64temp; + nesvnic->netstats.tx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_tx_frames += u64temp; + nesvnic->netstats.tx_packets += u64temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200)); + nesvnic->endnode_ipv4_tcp_retransmits += u32temp; + } + + target_stat_values[4] = nesvnic->nesdev->mac_pause_frames_received; + target_stat_values[5] = nesdev->nesadapter->nic_rx_eth_route_err; + target_stat_values[6] = nesvnic->tx_sw_dropped; + target_stat_values[7] = nesvnic->sq_locked; + target_stat_values[8] = nesvnic->sq_full; + target_stat_values[9] = nesvnic->segmented_tso_requests; + target_stat_values[10] = nesvnic->nesdev->mac_rx_symbol_err_frames; + target_stat_values[11] = nesvnic->nesdev->mac_rx_jabber_frames; + target_stat_values[12] = nesvnic->nesdev->mac_rx_oversized_frames; + target_stat_values[13] = nesvnic->nesdev->mac_rx_short_frames; + target_stat_values[14] = nesvnic->endnode_nstat_rx_discard; + target_stat_values[15] = nesvnic->endnode_nstat_rx_octets; + target_stat_values[16] = nesvnic->endnode_nstat_rx_frames; + target_stat_values[17] = nesvnic->endnode_nstat_tx_octets; + target_stat_values[18] = nesvnic->endnode_nstat_tx_frames; + target_stat_values[19] = mh_detected; + target_stat_values[20] = mh_pauses_sent; + target_stat_values[21] = nesvnic->endnode_ipv4_tcp_retransmits; + target_stat_values[22] = atomic_read(&cm_connects); + target_stat_values[23] = atomic_read(&cm_accepts); + target_stat_values[24] = atomic_read(&cm_disconnects); + target_stat_values[25] = atomic_read(&cm_connecteds); + target_stat_values[26] = atomic_read(&cm_connect_reqs); + target_stat_values[27] = atomic_read(&cm_rejects); + target_stat_values[28] = atomic_read(&mod_qp_timouts); + target_stat_values[29] = atomic_read(&qps_created); + target_stat_values[30] = atomic_read(&sw_qps_destroyed); + target_stat_values[31] = atomic_read(&qps_destroyed); + target_stat_values[32] = atomic_read(&cm_closes); + target_stat_values[33] = cm_packets_sent; + target_stat_values[34] = cm_packets_bounced; + target_stat_values[35] = cm_packets_created; + target_stat_values[36] = cm_packets_received; + target_stat_values[37] = cm_packets_dropped; + target_stat_values[38] = cm_packets_retrans; + target_stat_values[39] = cm_listens_created; + target_stat_values[40] = cm_listens_destroyed; + target_stat_values[41] = cm_backlog_drops; + target_stat_values[42] = atomic_read(&cm_loopbacks); + target_stat_values[43] = atomic_read(&cm_nodes_created); + target_stat_values[44] = atomic_read(&cm_nodes_destroyed); + target_stat_values[45] = atomic_read(&cm_accel_dropped_pkts); + target_stat_values[46] = atomic_read(&cm_resets_recvd); + target_stat_values[47] = int_mod_timer_init; + target_stat_values[48] = int_mod_cq_depth_1; + target_stat_values[49] = int_mod_cq_depth_4; + target_stat_values[50] = int_mod_cq_depth_16; + target_stat_values[51] = int_mod_cq_depth_24; + target_stat_values[52] = int_mod_cq_depth_32; + target_stat_values[53] = int_mod_cq_depth_128; + target_stat_values[54] = int_mod_cq_depth_256; + +} + + +/** + * nes_netdev_get_drvinfo + */ +static void nes_netdev_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + strcpy(drvinfo->driver, DRV_NAME); + strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev)); + strcpy(drvinfo->fw_version, "TBD"); + strcpy(drvinfo->version, DRV_VERSION); + drvinfo->n_stats = nes_netdev_get_stats_count(netdev); + drvinfo->testinfo_len = 0; + drvinfo->eedump_len = 0; + drvinfo->regdump_len = 0; +} + + +/** + * nes_netdev_set_coalesce + */ +static int nes_netdev_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *et_coalesce) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + unsigned long flags; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + if (et_coalesce->rx_max_coalesced_frames_low) { + shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; + } + if (et_coalesce->rx_max_coalesced_frames_irq) { + shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq; + } + if (et_coalesce->rx_max_coalesced_frames_high) { + shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high; + } + if (et_coalesce->rx_coalesce_usecs_low) { + shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low; + } + if (et_coalesce->rx_coalesce_usecs_high) { + shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high; + } + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + + /* using this to drive total interrupt moderation */ + nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; + if (et_coalesce->use_adaptive_rx_coalesce) { + nesadapter->et_use_adaptive_rx_coalesce = 1; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->et_rx_coalesce_usecs_irq = 0; + if (et_coalesce->pkt_rate_low) { + nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; + } + } else { + nesadapter->et_use_adaptive_rx_coalesce = 0; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + if (nesadapter->et_rx_coalesce_usecs_irq) { + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, + 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8))); + } + } + return 0; +} + + +/** + * nes_netdev_get_coalesce + */ +static int nes_netdev_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *et_coalesce) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct ethtool_coalesce temp_et_coalesce; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + unsigned long flags; + + memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce)); + temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; + temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; + temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; + temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low; + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; + temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; + temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; + temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; + temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; + if (nesadapter->et_use_adaptive_rx_coalesce) { + temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use; + } + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); + return 0; +} + + +/** + * nes_netdev_get_pauseparam + */ +static void nes_netdev_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *et_pauseparam) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + et_pauseparam->autoneg = 0; + et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0; + et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0; +} + + +/** + * nes_netdev_set_pauseparam + */ +static int nes_netdev_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *et_pauseparam) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 u32temp; + + if (et_pauseparam->autoneg) { + /* TODO: should return unsupported */ + return 0; + } + if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); + u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + nesdev->disable_tx_flow_control = 0; + } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); + u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + nesdev->disable_tx_flow_control = 1; + } + if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); + u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); + nesdev->disable_rx_flow_control = 0; + } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); + u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); + nesdev->disable_rx_flow_control = 1; + } + + return 0; +} + + +/** + * nes_netdev_get_settings + */ +static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u16 phy_data; + + et_cmd->duplex = DUPLEX_FULL; + et_cmd->port = PORT_MII; + if (nesadapter->OneG_Mode) { + et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg; + et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg; + et_cmd->speed = SPEED_1000; + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], + &phy_data); + if (phy_data&0x1000) { + et_cmd->autoneg = AUTONEG_ENABLE; + } else { + et_cmd->autoneg = AUTONEG_DISABLE; + } + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; + } else { + if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->advertising = ADVERTISED_FIBRE; + et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; + } else { + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->advertising = ADVERTISED_10000baseT_Full; + et_cmd->phy_address = nesdev->mac_index; + } + et_cmd->speed = SPEED_10000; + et_cmd->autoneg = AUTONEG_DISABLE; + } + et_cmd->maxtxpkt = 511; + et_cmd->maxrxpkt = 511; + return 0; +} + + +/** + * nes_netdev_set_settings + */ +static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u16 phy_data; + + if (nesadapter->OneG_Mode) { + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], + &phy_data); + if (et_cmd->autoneg) { + /* Turn on Full duplex, Autoneg, and restart autonegotiation */ + phy_data |= 0x1300; + } else { + // Turn off autoneg + phy_data &= ~0x1000; + } + nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], + phy_data); + } + + return 0; +} + + +static struct ethtool_ops nes_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_settings = nes_netdev_get_settings, + .set_settings = nes_netdev_set_settings, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_rx_csum = nes_netdev_get_rx_csum, + .get_sg = ethtool_op_get_sg, + .get_strings = nes_netdev_get_strings, + .get_stats_count = nes_netdev_get_stats_count, + .get_ethtool_stats = nes_netdev_get_ethtool_stats, + .get_drvinfo = nes_netdev_get_drvinfo, + .get_coalesce = nes_netdev_get_coalesce, + .set_coalesce = nes_netdev_set_coalesce, + .get_pauseparam = nes_netdev_get_pauseparam, + .set_pauseparam = nes_netdev_set_pauseparam, + .set_tx_csum = ethtool_op_set_tx_csum, + .set_rx_csum = nes_netdev_set_rx_csum, + .set_sg = ethtool_op_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +}; + + +static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 u32temp; + + nesvnic->vlan_grp = grp; + + /* Enable/Disable VLAN Stripping */ + u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); + if (grp) + u32temp &= 0xfdffffff; + else + u32temp |= 0x02000000; + + nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp); +} + + +/** + * nes_netdev_init - initialize network device + */ +struct net_device *nes_netdev_init(struct nes_device *nesdev, + void __iomem *mmio_addr) +{ + u64 u64temp; + struct nes_vnic *nesvnic = NULL; + struct net_device *netdev; + struct nic_qp_map *curr_qp_map; + u32 u32temp; + u16 phy_data; + u16 temp_phy_data; + + netdev = alloc_etherdev(sizeof(struct nes_vnic)); + if (!netdev) { + printk(KERN_ERR PFX "nesvnic etherdev alloc failed"); + return NULL; + } + + nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name); + + SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev); + + nesvnic = netdev_priv(netdev); + memset(nesvnic, 0, sizeof(*nesvnic)); + + netdev->open = nes_netdev_open; + netdev->stop = nes_netdev_stop; + netdev->hard_start_xmit = nes_netdev_start_xmit; + netdev->get_stats = nes_netdev_get_stats; + netdev->tx_timeout = nes_netdev_tx_timeout; + netdev->set_mac_address = nes_netdev_set_mac_address; + netdev->set_multicast_list = nes_netdev_set_multicast_list; + netdev->change_mtu = nes_netdev_change_mtu; + netdev->watchdog_timeo = NES_TX_TIMEOUT; + netdev->irq = nesdev->pcidev->irq; + netdev->mtu = ETH_DATA_LEN; + netdev->hard_header_len = ETH_HLEN; + netdev->addr_len = ETH_ALEN; + netdev->type = ARPHRD_ETHER; + netdev->features = NETIF_F_HIGHDMA; + netdev->ethtool_ops = &nes_ethtool_ops; + netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); + nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); + netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + netdev->vlan_rx_register = nes_netdev_vlan_rx_register; + netdev->features |= NETIF_F_LLTX; + + /* Fill in the port structure */ + nesvnic->netdev = netdev; + nesvnic->nesdev = nesdev; + nesvnic->msg_enable = netif_msg_init(debug, default_msg); + nesvnic->netdev_index = nesdev->netdev_count; + nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count; + nesvnic->max_frame_size = netdev->mtu+netdev->hard_header_len; + + curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)]; + nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid; + nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index; + nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port; + + /* Setup the burned in MAC address */ + u64temp = (u64)nesdev->nesadapter->mac_addr_low; + u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32; + u64temp += nesvnic->nic_index; + netdev->dev_addr[0] = (u8)(u64temp>>40); + netdev->dev_addr[1] = (u8)(u64temp>>32); + netdev->dev_addr[2] = (u8)(u64temp>>24); + netdev->dev_addr[3] = (u8)(u64temp>>16); + netdev->dev_addr[4] = (u8)(u64temp>>8); + netdev->dev_addr[5] = (u8)u64temp; + memcpy(netdev->perm_addr, netdev->dev_addr, 6); + + if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) { + netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_GSO | NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM; + } else { + netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + } + + nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," + " nic_index = %d, logical_port = %d, mac_index = %d.\n", + nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id, + nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index); + + if (nesvnic->nesdev->nesadapter->port_count == 1) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1; + if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) { + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; + } else { + nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2; + nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3; + } + } else { + if (nesvnic->nesdev->nesadapter->port_count == 2) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; + } else { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = 0xf; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; + } + } + nesvnic->next_qp_nic_index = 0; + + if (nesdev->netdev_count == 0) { + nesvnic->rdma_enabled = 1; + } else { + nesvnic->rdma_enabled = 0; + } + nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id; + spin_lock_init(&nesvnic->tx_lock); + nesdev->netdev[nesdev->netdev_count] = netdev; + + nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n", + nesvnic, nesdev->mac_index); + list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); + + if ((nesdev->netdev_count == 0) && + (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { + nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", + NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1))); + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200*(nesvnic->logical_port&1))); + u32temp |= 0x00200000; + nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200*(nesvnic->logical_port&1)), u32temp); + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200*(nesvnic->logical_port&1)) ); + if ((u32temp&0x0f1f0000) == 0x0f0f0000) { + if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + nes_init_phy(nesdev); + nes_read_10G_phy_reg(nesdev, 1, + nesdev->nesadapter->phy_index[nesvnic->logical_port]); + temp_phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, 1, + nesdev->nesadapter->phy_index[nesvnic->logical_port]); + phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + if (phy_data & 4) { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } else { + nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n"); + } + } else { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } + } + nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); + /* clear the MAC interrupt status, assumes direct logical to physical mapping */ + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port)); + nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port), u32temp); + + if (nesdev->nesadapter->phy_type[nesvnic->logical_port] != NES_PHY_TYPE_IRIS) + nes_init_phy(nesdev); + + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesvnic->logical_port), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + } + + return netdev; +} + + +/** + * nes_netdev_destroy - destroy network device structure + */ +void nes_netdev_destroy(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + /* make sure 'stop' method is called by Linux stack */ + /* nes_netdev_stop(netdev); */ + + list_del(&nesvnic->list); + + if (nesvnic->of_device_registered) { + nes_destroy_ofa_device(nesvnic->nesibdev); + } + + free_netdev(netdev); +} + + +/** + * nes_nic_cm_xmit -- CM calls this to send out pkts + */ +int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + int ret; + + skb->dev = netdev; + ret = dev_queue_xmit(skb); + if (ret) { + nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret); + } + + return ret; +} diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h new file mode 100644 index 000000000000..e64306bce80b --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect. All rights reserved. + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef NES_USER_H +#define NES_USER_H + +#include <linux/types.h> + +#define NES_ABI_USERSPACE_VER 1 +#define NES_ABI_KERNEL_VER 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct nes_alloc_ucontext_req { + __u32 reserved32; + __u8 userspace_ver; + __u8 reserved8[3]; +}; + +struct nes_alloc_ucontext_resp { + __u32 max_pds; /* maximum pds allowed for this user process */ + __u32 max_qps; /* maximum qps allowed for this user process */ + __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */ + __u8 virtwq; /* flag to indicate if virtual WQ are to be used or not */ + __u8 kernel_ver; + __u8 reserved[2]; +}; + +struct nes_alloc_pd_resp { + __u32 pd_id; + __u32 mmap_db_index; +}; + +struct nes_create_cq_req { + __u64 user_cq_buffer; + __u32 mcrqf; + __u8 reserved[4]; +}; + +struct nes_create_qp_req { + __u64 user_wqe_buffers; +}; + +enum iwnes_memreg_type { + IWNES_MEMREG_TYPE_MEM = 0x0000, + IWNES_MEMREG_TYPE_QP = 0x0001, + IWNES_MEMREG_TYPE_CQ = 0x0002, + IWNES_MEMREG_TYPE_MW = 0x0003, + IWNES_MEMREG_TYPE_FMR = 0x0004, +}; + +struct nes_mem_reg_req { + __u32 reg_type; /* indicates if id is memory, QP or CQ */ + __u32 reserved; +}; + +struct nes_create_cq_resp { + __u32 cq_id; + __u32 cq_size; + __u32 mmap_db_index; + __u32 reserved; +}; + +struct nes_create_qp_resp { + __u32 qp_id; + __u32 actual_sq_size; + __u32 actual_rq_size; + __u32 mmap_sq_db_index; + __u32 mmap_rq_db_index; + __u32 nes_drv_opt; +}; + +#endif /* NES_USER_H */ diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c new file mode 100644 index 000000000000..c4ec6ac63461 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/if_vlan.h> +#include <linux/crc32.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/byteorder.h> + +#include "nes.h" + + + +static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); + +u32 mh_detected; +u32 mh_pauses_sent; + +/** + * nes_read_eeprom_values - + */ +int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter) +{ + u32 mac_addr_low; + u16 mac_addr_high; + u16 eeprom_data; + u16 eeprom_offset; + u16 next_section_address; + u16 sw_section_ver; + u8 major_ver = 0; + u8 minor_ver = 0; + + /* TODO: deal with EEPROM endian issues */ + if (nesadapter->firmware_eeprom_offset == 0) { + /* Read the EEPROM Parameters */ + eeprom_data = nes_read16_eeprom(nesdev->regs, 0); + nes_debug(NES_DBG_HW, "EEPROM Offset 0 = 0x%04X\n", eeprom_data); + eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) << + ((eeprom_data & 0x0080) >> 7)); + nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset); + nesadapter->firmware_eeprom_offset = eeprom_offset; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4); + if (eeprom_data != 0x5746) { + nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data); + return -1; + } + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8); + nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset); + nesadapter->software_eeprom_offset = eeprom_offset; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4); + if (eeprom_data != 0x5753) { + printk("Not a valid Software Image = 0x%04X\n", eeprom_data); + return -1; + } + sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6); + nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n", + sw_section_ver); + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) << + ((eeprom_data & 0x0100) >> 8)); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x414d) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) << + ((eeprom_data & 0x0100) >> 8)); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x4f52) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x5746) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x5753) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x414d) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x464e) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8); + printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); + major_ver = (u8)(eeprom_data >> 8); + minor_ver = (u8)(eeprom_data); + + if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) { + nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n"); + } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) { + nesadapter->virtwq = 1; + } + nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + + (u32)((u8)eeprom_data); + +no_fw_rev: + /* eeprom is valid */ + eeprom_offset = nesadapter->software_eeprom_offset; + eeprom_offset += 8; + nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + mac_addr_low <<= 16; + mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n", + mac_addr_high, mac_addr_low); + nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max); + + nesadapter->mac_addr_low = mac_addr_low; + nesadapter->mac_addr_high = mac_addr_high; + + /* Read the Phy Type array */ + eeprom_offset += 10; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_type[0] = (u8)(eeprom_data >> 8); + nesadapter->phy_type[1] = (u8)eeprom_data; + + /* Read the port array */ + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_type[2] = (u8)(eeprom_data >> 8); + nesadapter->phy_type[3] = (u8)eeprom_data; + /* port_count is set by soft reset reg */ + nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u," + " port 2 -> %u, port 3 -> %u\n", + nesadapter->port_count, + nesadapter->phy_type[0], nesadapter->phy_type[1], + nesadapter->phy_type[2], nesadapter->phy_type[3]); + + /* Read PD config array */ + eeprom_offset += 10; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[0] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[0] = eeprom_data; + nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[1] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[1] = eeprom_data; + nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[2] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[2] = eeprom_data; + nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[3] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[3] = eeprom_data; + nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]); + + /* Read Rx Pool Size */ + eeprom_offset += 22; /* 46 */ + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->rx_threshold = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n", + nesadapter->tcp_timer_core_clk_divisor); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->iwarp_config = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->cm_config = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->wqm_wat = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->core_clock = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock); + + if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) { + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8; + nesadapter->phy_index[1] = eeprom_data & 0x00ff; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8; + nesadapter->phy_index[3] = eeprom_data & 0x00ff; + } else { + nesadapter->phy_index[0] = 4; + nesadapter->phy_index[1] = 5; + nesadapter->phy_index[2] = 6; + nesadapter->phy_index[3] = 7; + } + nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n", + nesadapter->phy_index[0],nesadapter->phy_index[1], + nesadapter->phy_index[2],nesadapter->phy_index[3]); + } + + return 0; +} + + +/** + * nes_read16_eeprom + */ +static u16 nes_read16_eeprom(void __iomem *addr, u16 offset) +{ + writel(NES_EEPROM_READ_REQUEST + (offset >> 1), + (void __iomem *)addr + NES_EEPROM_COMMAND); + + do { + } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) & + NES_EEPROM_READ_REQUEST); + + return readw((void __iomem *)addr + NES_EEPROM_DATA); +} + + +/** + * nes_write_1G_phy_reg + */ +void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 u32temp; + u32 counter; + unsigned long flags; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */ + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); +} + + +/** + * nes_read_1G_phy_reg + * This routine only issues the read, the data must be read + * separately. + */ +void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 u32temp; + u32 counter; + unsigned long flags; + + /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n", + phy_addr, nesdev->mac_index); */ + spin_lock_irqsave(&nesadapter->phy_lock, flags); + + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */ + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) { + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + *data = 0xffff; + } else { + *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); +} + + +/** + * nes_write_10G_phy_reg + */ +void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, + u8 phy_addr, u16 data) +{ + u32 dev_addr; + u32 port_addr; + u32 u32temp; + u32 counter; + + dev_addr = 1; + port_addr = phy_addr; + + /* set address */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + + /* set data */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); +} + + +/** + * nes_read_10G_phy_reg + * This routine only issues the read, the data must be read + * separately. + */ +void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr) +{ + u32 dev_addr; + u32 port_addr; + u32 u32temp; + u32 counter; + + dev_addr = 1; + port_addr = phy_addr; + + /* set address */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + + /* issue read */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); +} + + +/** + * nes_get_cqp_request + */ +struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_cqp_request *cqp_request = NULL; + + if (!list_empty(&nesdev->cqp_avail_reqs)) { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_request = list_entry(nesdev->cqp_avail_reqs.next, + struct nes_cqp_request, list); + list_del_init(&cqp_request->list); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } else { + cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL); + if (cqp_request) { + cqp_request->dynamic = 1; + INIT_LIST_HEAD(&cqp_request->list); + } + } + + if (cqp_request) { + init_waitqueue_head(&cqp_request->waitq); + cqp_request->waiting = 0; + cqp_request->request_done = 0; + cqp_request->callback = 0; + init_waitqueue_head(&cqp_request->waitq); + nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n", + cqp_request); + } else + printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n", + __FUNCTION__); + + return cqp_request; +} + + +/** + * nes_post_cqp_request + */ +void nes_post_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request, int ring_doorbell) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + unsigned long flags; + u32 cqp_head; + u64 u64temp; + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) & + (nesdev->cqp.sq_size - 1)) != 1) + && (list_empty(&nesdev->cqp_pending_reqs))) { + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + barrier(); + u64temp = (unsigned long)cqp_request; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX, + u64temp); + nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," + " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," + " waiting = %d, refcount = %d.\n", + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, + nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, + cqp_request->waiting, atomic_read(&cqp_request->refcount)); + barrier(); + if (ring_doorbell) { + /* Ring doorbell (1 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); + } + + barrier(); + } else { + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X" + " put on the pending queue.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX])); + list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs); + } + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + return; +} + + +/** + * nes_arp_table + */ +int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + int arp_index; + int err = 0; + + for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) { + if (nesadapter->arp_table[arp_index].ip_addr == ip_addr) + break; + } + + if (action == NES_ARP_ADD) { + if (arp_index != nesadapter->arp_table_size) { + return -1; + } + + arp_index = 0; + err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, + nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index); + if (err) { + nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); + return err; + } + nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index); + + nesadapter->arp_table[arp_index].ip_addr = ip_addr; + memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN); + return arp_index; + } + + /* DELETE or RESOLVE */ + if (arp_index == nesadapter->arp_table_size) { + nes_debug(NES_DBG_NETDEV, "mac address not in ARP table - cannot delete or resolve\n"); + return -1; + } + + if (action == NES_ARP_RESOLVE) { + nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index); + return arp_index; + } + + if (action == NES_ARP_DELETE) { + nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index); + nesadapter->arp_table[arp_index].ip_addr = 0; + memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN); + nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index); + return arp_index; + } + + return -1; +} + + +/** + * nes_mh_fix + */ +void nes_mh_fix(unsigned long parm) +{ + unsigned long flags; + struct nes_device *nesdev = (struct nes_device *)parm; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 used_chunks_tx; + u32 temp_used_chunks_tx; + u32 temp_last_used_chunks_tx; + u32 used_chunks_mask; + u32 mac_tx_frames_low; + u32 mac_tx_frames_high; + u32 mac_tx_pauses; + u32 serdes_status; + u32 reset_value; + u32 tx_control; + u32 tx_config; + u32 tx_pause_quanta; + u32 rx_control; + u32 rx_config; + u32 mac_exact_match; + u32 mpp_debug; + u32 i=0; + u32 chunks_tx_progress = 0; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) { + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + goto no_mh_work; + } + nesadapter->mac_sw_state[0] = NES_MAC_SW_MH; + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + do { + mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW); + mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH); + mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); + used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX); + nesdev->mac_pause_frames_sent += mac_tx_pauses; + used_chunks_mask = 0; + temp_used_chunks_tx = used_chunks_tx; + temp_last_used_chunks_tx = nesdev->last_used_chunks_tx; + + if (nesdev->netdev[0]) { + nesvnic = netdev_priv(nesdev->netdev[0]); + } else { + break; + } + + for (i=0; i<4; i++) { + used_chunks_mask <<= 8; + if (nesvnic->qp_nic_index[i] != 0xff) { + used_chunks_mask |= 0xff; + if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) { + chunks_tx_progress = 1; + } + } + temp_used_chunks_tx >>= 8; + temp_last_used_chunks_tx >>= 8; + } + if ((mac_tx_frames_low) || (mac_tx_frames_high) || + (!(used_chunks_tx&used_chunks_mask)) || + (!(nesdev->last_used_chunks_tx&used_chunks_mask)) || + (chunks_tx_progress) ) { + nesdev->last_used_chunks_tx = used_chunks_tx; + break; + } + nesdev->last_used_chunks_tx = used_chunks_tx; + barrier(); + + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005); + mh_pauses_sent++; + mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); + if (mac_tx_pauses) { + nesdev->mac_pause_frames_sent += mac_tx_pauses; + break; + } + + tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL); + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA); + rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL); + rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG); + mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM); + mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG); + + /* one last ditch effort to avoid a false positive */ + mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); + if (mac_tx_pauses) { + nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent; + nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n"); + break; + } + mh_detected++; + + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000); + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)) { + /* mdelay(1); */ + } + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); + serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0); + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000); + if (nesadapter->OneG_Mode) { + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222); + } else { + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); + } + serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); + + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control); + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta); + nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control); + nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config); + nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match); + nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug); + + } while (0); + + nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE; +no_mh_work: + nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5); + add_timer(&nesdev->nesadapter->mh_timer); +} + +/** + * nes_clc + */ +void nes_clc(unsigned long parm) +{ + unsigned long flags; + struct nes_device *nesdev = (struct nes_device *)parm; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nesadapter->link_interrupt_count[0] = 0; + nesadapter->link_interrupt_count[1] = 0; + nesadapter->link_interrupt_count[2] = 0; + nesadapter->link_interrupt_count[3] = 0; + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + + nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ + add_timer(&nesadapter->lc_timer); +} + + +/** + * nes_dump_mem + */ +void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) +{ + char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f'}; + char *ptr; + char hex_buf[80]; + char ascii_buf[20]; + int num_char; + int num_ascii; + int num_hex; + + if (!(nes_debug_level & dump_debug_level)) { + return; + } + + ptr = addr; + if (length > 0x100) { + nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); + length = 0x100; + } + nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length); + + memset(ascii_buf, 0, 20); + memset(hex_buf, 0, 80); + + num_ascii = 0; + num_hex = 0; + for (num_char = 0; num_char < length; num_char++) { + if (num_ascii == 8) { + ascii_buf[num_ascii++] = ' '; + hex_buf[num_hex++] = '-'; + hex_buf[num_hex++] = ' '; + } + + if (*ptr < 0x20 || *ptr > 0x7e) + ascii_buf[num_ascii++] = '.'; + else + ascii_buf[num_ascii++] = *ptr; + hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)]; + hex_buf[num_hex++] = xlate[*ptr & 0x0f]; + hex_buf[num_hex++] = ' '; + ptr++; + + if (num_ascii >= 17) { + /* output line and reset */ + nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); + memset(ascii_buf, 0, 20); + memset(hex_buf, 0, 80); + num_ascii = 0; + num_hex = 0; + } + } + + /* output the rest */ + if (num_ascii) { + while (num_ascii < 17) { + if (num_ascii == 8) { + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + } + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + num_ascii++; + } + + nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); + } +} diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c new file mode 100644 index 000000000000..ffd4b425567f --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -0,0 +1,3917 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/highmem.h> +#include <asm/byteorder.h> + +#include <rdma/ib_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/ib_user_verbs.h> + +#include "nes.h" + +#include <rdma/ib_umem.h> + +atomic_t mod_qp_timouts; +atomic_t qps_created; +atomic_t sw_qps_destroyed; + + +/** + * nes_alloc_mw + */ +static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { + unsigned long flags; + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_cqp_request *cqp_request; + struct nes_mr *nesmr; + struct ib_mw *ibmw; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret; + u32 stag; + u32 stag_index = 0; + u32 next_stag_index = 0; + u32 driver_key = 0; + u8 stag_key = 0; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = 0; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, &next_stag_index); + if (ret) { + return ERR_PTR(ret); + } + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n", + stag, stag_index); + + /* Register the region with the adapter */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + kfree(nesmr); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ | + NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO | + NES_CQP_STAG_REM_ACC_EN); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + stag, ret, cqp_request->major_code, cqp_request->minor_code); + if ((!ret) || (cqp_request->major_code)) { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + kfree(nesmr); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + if (!ret) { + return ERR_PTR(-ETIME); + } else { + return ERR_PTR(-ENOMEM); + } + } else { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } + + nesmr->ibmw.rkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_MW; + ibmw = &nesmr->ibmw; + nesmr->pbl_4k = 0; + nesmr->pbls_used = 0; + + return ibmw; +} + + +/** + * nes_dealloc_mw + */ +static int nes_dealloc_mw(struct ib_mw *ibmw) +{ + struct nes_mr *nesmr = to_nesmw(ibmw); + struct nes_vnic *nesvnic = to_nesvnic(ibmw->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + int err = 0; + unsigned long flags; + int ret; + + /* Deallocate the window with the adapter */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", + ibmw->rkey); + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + ret, cqp_request->major_code, cqp_request->minor_code); + if ((!ret) || (cqp_request->major_code)) { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + if (!ret) { + err = -ETIME; + } else { + err = -EIO; + } + } else { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } + + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + (ibmw->rkey & 0x0fffff00) >> 8); + kfree(nesmr); + + return err; +} + + +/** + * nes_bind_mw + */ +static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, + struct ib_mw_bind *ibmw_bind) +{ + u64 u64temp; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + /* struct nes_mr *nesmr = to_nesmw(ibmw); */ + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_hw_qp_wqe *wqe; + unsigned long flags = 0; + u32 head; + u32 wqe_misc = 0; + u32 qsize; + + if (nesqp->ibqp_state > IB_QPS_RTS) + return -EINVAL; + + spin_lock_irqsave(&nesqp->lock, flags); + + head = nesqp->hwqp.sq_head; + qsize = nesqp->hwqp.sq_tail; + + /* Check for SQ overflow */ + if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { + spin_unlock_irqrestore(&nesqp->lock, flags); + return -EINVAL; + } + + wqe = &nesqp->hwqp.sq_vbase[head]; + /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */ + nes_fill_init_qp_wqe(wqe, nesqp, head); + u64temp = ibmw_bind->wr_id; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp); + wqe_misc = NES_IWARP_SQ_OP_BIND; + + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + + if (ibmw_bind->send_flags & IB_SEND_SIGNALED) + wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; + + if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) { + wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE; + } + if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) { + wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ; + } + + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX, + ibmw_bind->length); + wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0; + u64temp = (u64)ibmw_bind->addr; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp); + + head++; + if (head >= qsize) + head = 0; + + nesqp->hwqp.sq_head = head; + barrier(); + + nes_write32(nesdev->regs+NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + + spin_unlock_irqrestore(&nesqp->lock, flags); + + return 0; +} + + +/** + * nes_alloc_fmr + */ +static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, + int ibmr_access_flags, + struct ib_fmr_attr *ibfmr_attr) +{ + unsigned long flags; + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_fmr *nesfmr; + struct nes_cqp_request *cqp_request; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret; + u32 stag; + u32 stag_index = 0; + u32 next_stag_index = 0; + u32 driver_key = 0; + u32 opcode = 0; + u8 stag_key = 0; + int i=0; + struct nes_vpbl vpbl; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = 0; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, &next_stag_index); + if (ret) { + goto failed_resource_alloc; + } + + nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL); + if (!nesfmr) { + ret = -ENOMEM; + goto failed_fmr_alloc; + } + + nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR; + if (ibfmr_attr->max_pages == 1) { + /* use zero length PBL */ + nesfmr->nesmr.pbl_4k = 0; + nesfmr->nesmr.pbls_used = 0; + } else if (ibfmr_attr->max_pages <= 32) { + /* use PBL 256 */ + nesfmr->nesmr.pbl_4k = 0; + nesfmr->nesmr.pbls_used = 1; + } else if (ibfmr_attr->max_pages <= 512) { + /* use 4K PBLs */ + nesfmr->nesmr.pbl_4k = 1; + nesfmr->nesmr.pbls_used = 1; + } else { + /* use two level 4K PBLs */ + /* add support for two level 256B PBLs */ + nesfmr->nesmr.pbl_4k = 1; + nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) + + ((ibfmr_attr->max_pages & 511) ? 1 : 0); + } + /* Register the region with the adapter */ + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + /* track PBL resources */ + if (nesfmr->nesmr.pbls_used != 0) { + if (nesfmr->nesmr.pbl_4k) { + if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + ret = -ENOMEM; + goto failed_vpbl_alloc; + } else { + nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used; + } + } else { + if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + ret = -ENOMEM; + goto failed_vpbl_alloc; + } else { + nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used; + } + } + } + + /* one level pbl */ + if (nesfmr->nesmr.pbls_used == 0) { + nesfmr->root_vpbl.pbl_vbase = NULL; + nes_debug(NES_DBG_MR, "zero level pbl \n"); + } else if (nesfmr->nesmr.pbls_used == 1) { + /* can change it to kmalloc & dma_map_single */ + nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &nesfmr->root_vpbl.pbl_pbase); + if (!nesfmr->root_vpbl.pbl_vbase) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + ret = -ENOMEM; + goto failed_vpbl_alloc; + } + nesfmr->leaf_pbl_cnt = 0; + nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n", + nesfmr->root_vpbl.pbl_vbase); + } + /* two level pbl */ + else { + nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, + &nesfmr->root_vpbl.pbl_pbase); + if (!nesfmr->root_vpbl.pbl_vbase) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + ret = -ENOMEM; + goto failed_vpbl_alloc; + } + + nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); + if (!nesfmr->root_vpbl.leaf_vpbl) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + ret = -ENOMEM; + goto failed_leaf_vpbl_alloc; + } + + nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1; + nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p" + " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n", + nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl); + + for (i=0; i<nesfmr->leaf_pbl_cnt; i++) + nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL; + + for (i=0; i<nesfmr->leaf_pbl_cnt; i++) { + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + + if (!vpbl.pbl_vbase) { + ret = -ENOMEM; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + goto failed_leaf_vpbl_pages_alloc; + } + + nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase); + nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + nesfmr->root_vpbl.leaf_vpbl[i] = vpbl; + + nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n", + nesfmr->root_vpbl.pbl_vbase[i].pa_low, + nesfmr->root_vpbl.pbl_vbase[i].pa_high, + &nesfmr->root_vpbl.leaf_vpbl[i]); + } + } + nesfmr->ib_qp = NULL; + nesfmr->access_rights =0; + + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto failed_leaf_vpbl_pages_alloc; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n", + stag, stag_index); + + opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; + + if (nesfmr->nesmr.pbl_4k == 1) + opcode |= NES_CQP_STAG_PBL_BLK_SIZE; + + if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) { + opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | + NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN; + nesfmr->access_rights |= + NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE | + NES_CQP_STAG_REM_ACC_EN; + } + + if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) { + opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | + NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN; + nesfmr->access_rights |= + NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ | + NES_CQP_STAG_REM_ACC_EN; + } + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = + cpu_to_le32((nesfmr->nesmr.pbls_used>1) ? + (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + stag, ret, cqp_request->major_code, cqp_request->minor_code); + + if ((!ret) || (cqp_request->major_code)) { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + ret = (!ret) ? -ETIME : -EIO; + goto failed_leaf_vpbl_pages_alloc; + } else { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } + + nesfmr->nesmr.ibfmr.lkey = stag; + nesfmr->nesmr.ibfmr.rkey = stag; + nesfmr->attr = *ibfmr_attr; + + return &nesfmr->nesmr.ibfmr; + + failed_leaf_vpbl_pages_alloc: + /* unroll all allocated pages */ + for (i=0; i<nesfmr->leaf_pbl_cnt; i++) { + if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) { + pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase, + nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase); + } + } + if (nesfmr->root_vpbl.leaf_vpbl) + kfree(nesfmr->root_vpbl.leaf_vpbl); + + failed_leaf_vpbl_alloc: + if (nesfmr->leaf_pbl_cnt == 0) { + if (nesfmr->root_vpbl.pbl_vbase) + pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase, + nesfmr->root_vpbl.pbl_pbase); + } else + pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase, + nesfmr->root_vpbl.pbl_pbase); + + failed_vpbl_alloc: + kfree(nesfmr); + + failed_fmr_alloc: + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + + failed_resource_alloc: + return ERR_PTR(ret); +} + + +/** + * nes_dealloc_fmr + */ +static int nes_dealloc_fmr(struct ib_fmr *ibfmr) +{ + struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr); + struct nes_fmr *nesfmr = to_nesfmr(nesmr); + struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_mr temp_nesmr = *nesmr; + int i = 0; + + temp_nesmr.ibmw.device = ibfmr->device; + temp_nesmr.ibmw.pd = ibfmr->pd; + temp_nesmr.ibmw.rkey = ibfmr->rkey; + temp_nesmr.ibmw.uobject = NULL; + + /* free the resources */ + if (nesfmr->leaf_pbl_cnt == 0) { + /* single PBL case */ + if (nesfmr->root_vpbl.pbl_vbase) + pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase, + nesfmr->root_vpbl.pbl_pbase); + } else { + for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) { + pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase, + nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase); + } + kfree(nesfmr->root_vpbl.leaf_vpbl); + pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase, + nesfmr->root_vpbl.pbl_pbase); + } + + return nes_dealloc_mw(&temp_nesmr.ibmw); +} + + +/** + * nes_map_phys_fmr + */ +static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + return 0; +} + + +/** + * nes_unmap_frm + */ +static int nes_unmap_fmr(struct list_head *ibfmr_list) +{ + return 0; +} + + + +/** + * nes_query_device + */ +static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + + memset(props, 0, sizeof(*props)); + memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); + + props->fw_ver = nesdev->nesadapter->fw_ver; + props->device_cap_flags = nesdev->nesadapter->device_cap_flags; + props->vendor_id = nesdev->nesadapter->vendor_id; + props->vendor_part_id = nesdev->nesadapter->vendor_part_id; + props->hw_ver = nesdev->nesadapter->hw_rev; + props->max_mr_size = 0x80000000; + props->max_qp = nesibdev->max_qp; + props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; + props->max_sge = nesdev->nesadapter->max_sge; + props->max_cq = nesibdev->max_cq; + props->max_cqe = nesdev->nesadapter->max_cqe - 1; + props->max_mr = nesibdev->max_mr; + props->max_mw = nesibdev->max_mr; + props->max_pd = nesibdev->max_pd; + props->max_sge_rd = 1; + switch (nesdev->nesadapter->max_irrq_wr) { + case 0: + props->max_qp_rd_atom = 1; + break; + case 1: + props->max_qp_rd_atom = 4; + break; + case 2: + props->max_qp_rd_atom = 16; + break; + case 3: + props->max_qp_rd_atom = 32; + break; + default: + props->max_qp_rd_atom = 0; + } + props->max_qp_init_rd_atom = props->max_qp_wr; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_map_per_fmr = 1; + + return 0; +} + + +/** + * nes_query_port + */ +static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) +{ + memset(props, 0, sizeof(*props)); + + props->max_mtu = IB_MTU_2048; + props->active_mtu = IB_MTU_2048; + props->lid = 1; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + props->state = IB_PORT_ACTIVE; + props->phys_state = 0; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->qkey_viol_cntr = 0; + props->active_width = IB_WIDTH_4X; + props->active_speed = 1; + props->max_msg_sz = 0x80000000; + + return 0; +} + + +/** + * nes_modify_port + */ +static int nes_modify_port(struct ib_device *ibdev, u8 port, + int port_modify_mask, struct ib_port_modify *props) +{ + return 0; +} + + +/** + * nes_query_pkey + */ +static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + *pkey = 0; + return 0; +} + + +/** + * nes_query_gid + */ +static int nes_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6); + + return 0; +} + + +/** + * nes_alloc_ucontext - Allocate the user context data structure. This keeps track + * of all objects associated with a particular user-mode client. + */ +static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_alloc_ucontext_req req; + struct nes_alloc_ucontext_resp uresp; + struct nes_ucontext *nes_ucontext; + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + + + if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) { + printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n"); + return ERR_PTR(-EINVAL); + } + + if (req.userspace_ver != NES_ABI_USERSPACE_VER) { + printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n", + req.userspace_ver, NES_ABI_USERSPACE_VER); + return ERR_PTR(-EINVAL); + } + + + memset(&uresp, 0, sizeof uresp); + + uresp.max_qps = nesibdev->max_qp; + uresp.max_pds = nesibdev->max_pd; + uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2; + uresp.virtwq = nesadapter->virtwq; + uresp.kernel_ver = NES_ABI_KERNEL_VER; + + nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL); + if (!nes_ucontext) + return ERR_PTR(-ENOMEM); + + nes_ucontext->nesdev = nesdev; + nes_ucontext->mmap_wq_offset = uresp.max_pds; + nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset + + ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) / + PAGE_SIZE; + + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + kfree(nes_ucontext); + return ERR_PTR(-EFAULT); + } + + INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list); + INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list); + atomic_set(&nes_ucontext->usecnt, 1); + return &nes_ucontext->ibucontext; +} + + +/** + * nes_dealloc_ucontext + */ +static int nes_dealloc_ucontext(struct ib_ucontext *context) +{ + /* struct nes_vnic *nesvnic = to_nesvnic(context->device); */ + /* struct nes_device *nesdev = nesvnic->nesdev; */ + struct nes_ucontext *nes_ucontext = to_nesucontext(context); + + if (!atomic_dec_and_test(&nes_ucontext->usecnt)) + return 0; + kfree(nes_ucontext); + return 0; +} + + +/** + * nes_mmap + */ +static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + unsigned long index; + struct nes_vnic *nesvnic = to_nesvnic(context->device); + struct nes_device *nesdev = nesvnic->nesdev; + /* struct nes_adapter *nesadapter = nesdev->nesadapter; */ + struct nes_ucontext *nes_ucontext; + struct nes_qp *nesqp; + + nes_ucontext = to_nesucontext(context); + + + if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) { + index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE; + index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) + + PAGE_SIZE-1) & (~(PAGE_SIZE-1)); + if (!test_bit(index, nes_ucontext->allocated_wqs)) { + nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index); + return -EFAULT; + } + nesqp = nes_ucontext->mmap_nesqp[index]; + if (nesqp == NULL) { + nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index); + return -EFAULT; + } + if (remap_pfn_range(vma, vma->vm_start, + virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) { + nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n"); + return -EAGAIN; + } + vma->vm_private_data = nesqp; + return 0; + } else { + index = vma->vm_pgoff; + if (!test_bit(index, nes_ucontext->allocated_doorbells)) + return -EFAULT; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + (nesdev->doorbell_start + + ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096)) + >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + vma->vm_private_data = nes_ucontext; + return 0; + } + + return -ENOSYS; +} + + +/** + * nes_alloc_pd + */ +static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct nes_pd *nespd; + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_ucontext *nesucontext; + struct nes_alloc_pd_resp uresp; + u32 pd_num = 0; + int err; + + nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", + nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, + atomic_read(&nesvnic->netdev->refcnt)); + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, + nesadapter->max_pd, &pd_num, &nesadapter->next_pd); + if (err) { + return ERR_PTR(err); + } + + nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL); + if (!nespd) { + nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); + return ERR_PTR(-ENOMEM); + } + + nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", + nespd, nesvnic->nesibdev->ibdev.name); + + nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; + + if (context) { + nesucontext = to_nesucontext(context); + nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells, + NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); + nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", + nespd->mmap_db_index, nespd->pd_id); + if (nespd->mmap_db_index > NES_MAX_USER_DB_REGIONS) { + nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); + nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); + kfree(nespd); + return ERR_PTR(-ENOMEM); + } + + uresp.pd_id = nespd->pd_id; + uresp.mmap_db_index = nespd->mmap_db_index; + if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) { + nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); + kfree(nespd); + return ERR_PTR(-EFAULT); + } + + set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); + nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id; + nesucontext->first_free_db = nespd->mmap_db_index + 1; + } + + nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd); + return &nespd->ibpd; +} + + +/** + * nes_dealloc_pd + */ +static int nes_dealloc_pd(struct ib_pd *ibpd) +{ + struct nes_ucontext *nesucontext; + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if ((ibpd->uobject) && (ibpd->uobject->context)) { + nesucontext = to_nesucontext(ibpd->uobject->context); + nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n", + nespd->mmap_db_index); + clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); + nesucontext->mmap_db_index[nespd->mmap_db_index] = 0; + if (nesucontext->first_free_db > nespd->mmap_db_index) { + nesucontext->first_free_db = nespd->mmap_db_index; + } + } + + nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n", + nespd->pd_id, nespd); + nes_free_resource(nesadapter, nesadapter->allocated_pds, + (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12)); + kfree(nespd); + + return 0; +} + + +/** + * nes_create_ah + */ +static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-ENOSYS); +} + + +/** + * nes_destroy_ah + */ +static int nes_destroy_ah(struct ib_ah *ah) +{ + return -ENOSYS; +} + + +/** + * nes_get_encoded_size + */ +static inline u8 nes_get_encoded_size(int *size) +{ + u8 encoded_size = 0; + if (*size <= 32) { + *size = 32; + encoded_size = 1; + } else if (*size <= 128) { + *size = 128; + encoded_size = 2; + } else if (*size <= 512) { + *size = 512; + encoded_size = 3; + } + return (encoded_size); +} + + + +/** + * nes_setup_virt_qp + */ +static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl, + struct nes_vnic *nesvnic, int sq_size, int rq_size) +{ + unsigned long flags; + void *mem; + __le64 *pbl = NULL; + __le64 *tpbl; + __le64 *pblbuffer; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 pbl_entries; + u8 rq_pbl_entries; + u8 sq_pbl_entries; + + pbl_entries = nespbl->pbl_size >> 3; + nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%p\n", + nespbl->pbl_size, pbl_entries, + (void *)nespbl->pbl_vbase, + (void *)nespbl->pbl_pbase); + pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */ + /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */ + /* the first pbl to be fro the rq_vbase... */ + rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12; + sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12; + nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32); + if (!nespbl->page) { + nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n"); + kfree(nespbl); + return -ENOMEM; + } + + nesqp->hwqp.sq_vbase = kmap(nespbl->page); + nesqp->page = nespbl->page; + if (!nesqp->hwqp.sq_vbase) { + nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n"); + kfree(nespbl); + return -ENOMEM; + } + + /* Now to get to sq.. we need to calculate how many */ + /* PBL entries were used by the rq.. */ + pbl += sq_pbl_entries; + nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32); + /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */ + /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */ + + nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%p rq_vbase=%p rq_pbase=%p\n", + nesqp->hwqp.sq_vbase, (void *)nesqp->hwqp.sq_pbase, + nesqp->hwqp.rq_vbase, (void *)nesqp->hwqp.rq_pbase); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (!nesadapter->free_256pbl) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + kfree(nespbl); + return -ENOMEM; + } + nesadapter->free_256pbl--; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase); + pblbuffer = nesqp->pbl_vbase; + if (!nesqp->pbl_vbase) { + /* memory allocated during nes_reg_user_mr() */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + return -ENOMEM; + } + memset(nesqp->pbl_vbase, 0, 256); + /* fill in the page address in the pbl buffer.. */ + tpbl = pblbuffer + 16; + pbl = (__le64 *)nespbl->pbl_vbase; + while (sq_pbl_entries--) + *tpbl++ = *pbl++; + tpbl = pblbuffer; + while (rq_pbl_entries--) + *tpbl++ = *pbl++; + + /* done with memory allocated during nes_reg_user_mr() */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + + nesqp->qp_mem_size = + max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */ + /* Round up to a multiple of a page */ + nesqp->qp_mem_size += PAGE_SIZE - 1; + nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); + + mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, + &nesqp->hwqp.q2_pbase); + + if (!mem) { + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); + nesqp->pbl_vbase = NULL; + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + return -ENOMEM; + } + nesqp->hwqp.q2_vbase = mem; + mem += 256; + memset(nesqp->hwqp.q2_vbase, 0, 256); + nesqp->nesqp_context = mem; + memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); + nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; + + return 0; +} + + +/** + * nes_setup_mmap_qp + */ +static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic, + int sq_size, int rq_size) +{ + void *mem; + struct nes_device *nesdev = nesvnic->nesdev; + + nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) + + (sizeof(struct nes_hw_qp_wqe) * rq_size) + + max((u32)sizeof(struct nes_qp_context), ((u32)256)) + + 256; /* this is Q2 */ + /* Round up to a multiple of a page */ + nesqp->qp_mem_size += PAGE_SIZE - 1; + nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); + + mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, + &nesqp->hwqp.sq_pbase); + if (!mem) + return -ENOMEM; + nes_debug(NES_DBG_QP, "PCI consistent memory for " + "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n", + mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size); + + memset(mem, 0, nesqp->qp_mem_size); + + nesqp->hwqp.sq_vbase = mem; + mem += sizeof(struct nes_hw_qp_wqe) * sq_size; + + nesqp->hwqp.rq_vbase = mem; + nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase + + sizeof(struct nes_hw_qp_wqe) * sq_size; + mem += sizeof(struct nes_hw_qp_wqe) * rq_size; + + nesqp->hwqp.q2_vbase = mem; + nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase + + sizeof(struct nes_hw_qp_wqe) * rq_size; + mem += 256; + memset(nesqp->hwqp.q2_vbase, 0, 256); + + nesqp->nesqp_context = mem; + nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; + memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); + return 0; +} + + +/** + * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. + */ +static inline void nes_free_qp_mem(struct nes_device *nesdev, + struct nes_qp *nesqp, int virt_wqs) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + if (!virt_wqs) { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + }else { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase ); + nesqp->pbl_vbase = NULL; + kunmap(nesqp->page); + } +} + + +/** + * nes_create_qp + */ +static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, struct ib_udata *udata) +{ + u64 u64temp= 0; + u64 u64nesqp = 0; + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_qp *nesqp; + struct nes_cq *nescq; + struct nes_ucontext *nes_ucontext; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + struct nes_create_qp_req req; + struct nes_create_qp_resp uresp; + struct nes_pbl *nespbl = NULL; + u32 qp_num = 0; + u32 opcode = 0; + /* u32 counter = 0; */ + void *mem; + unsigned long flags; + int ret; + int err; + int virt_wqs = 0; + int sq_size; + int rq_size; + u8 sq_encoded_size; + u8 rq_encoded_size; + /* int counter; */ + + atomic_inc(&qps_created); + switch (init_attr->qp_type) { + case IB_QPT_RC: + if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) { + init_attr->cap.max_inline_data = 0; + } else { + init_attr->cap.max_inline_data = 64; + } + sq_size = init_attr->cap.max_send_wr; + rq_size = init_attr->cap.max_recv_wr; + + // check if the encoded sizes are OK or not... + sq_encoded_size = nes_get_encoded_size(&sq_size); + rq_encoded_size = nes_get_encoded_size(&rq_size); + + if ((!sq_encoded_size) || (!rq_encoded_size)) { + nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n", + rq_size, sq_size); + return ERR_PTR(-EINVAL); + } + + init_attr->cap.max_send_wr = sq_size -2; + init_attr->cap.max_recv_wr = rq_size -1; + nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); + + ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps, + nesadapter->max_qp, &qp_num, &nesadapter->next_qp); + if (ret) { + return ERR_PTR(ret); + } + + /* Need 512 (actually now 1024) byte alignment on this structure */ + mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL); + if (!mem) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_debug(NES_DBG_QP, "Unable to allocate QP\n"); + return ERR_PTR(-ENOMEM); + } + u64nesqp = (unsigned long)mem; + u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1; + u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1; + u64nesqp &= ~u64temp; + nesqp = (struct nes_qp *)(unsigned long)u64nesqp; + /* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p. Rounded to closest %u\n", + nesqp, mem, NES_SW_CONTEXT_ALIGN); */ + nesqp->allocated_buffer = mem; + + if (udata) { + if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); + return NULL; + } + if (req.user_wqe_buffers) { + virt_wqs = 1; + } + if ((ibpd->uobject) && (ibpd->uobject->context)) { + nesqp->user_mode = 1; + nes_ucontext = to_nesucontext(ibpd->uobject->context); + if (virt_wqs) { + err = 1; + list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", + nespbl, nespbl->user_base); + break; + } + } + if (err) { + nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", + (long long unsigned int)req.user_wqe_buffers); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + } + + nes_ucontext = to_nesucontext(ibpd->uobject->context); + nesqp->mmap_sq_db_index = + find_next_zero_bit(nes_ucontext->allocated_wqs, + NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); + /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", + nespd->mmap_db_index); */ + if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { + nes_debug(NES_DBG_QP, + "db index > max user regions, failing create QP\n"); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + if (virt_wqs) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + } + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); + nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; + nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; + } else { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-EFAULT); + } + } + err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) : + nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size); + if (err) { + nes_debug(NES_DBG_QP, + "error geting qp mem code = %d\n", err); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + + nesqp->hwqp.sq_size = sq_size; + nesqp->hwqp.sq_encoded_size = sq_encoded_size; + nesqp->hwqp.sq_head = 1; + nesqp->hwqp.rq_size = rq_size; + nesqp->hwqp.rq_encoded_size = rq_encoded_size; + /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n", + (void *)nesqp->nesqp_context_pbase); + */ + nesqp->hwqp.qp_id = qp_num; + nesqp->ibqp.qp_num = nesqp->hwqp.qp_id; + nesqp->nespd = nespd; + + nescq = to_nescq(init_attr->send_cq); + nesqp->nesscq = nescq; + nescq = to_nescq(init_attr->recv_cq); + nesqp->nesrcq = nescq; + + nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC_PCI_FCN_SHIFT); + nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size << + NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT); + nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size << + NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT); + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN); + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN); + nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number + + ((u32)nesqp->nesrcq->hw_cq.cq_number << 16)); + u64temp = (u64)nesqp->hwqp.sq_pbase; + nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + + + if (!virt_wqs) { + u64temp = (u64)nesqp->hwqp.sq_pbase; + nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)nesqp->hwqp.rq_pbase; + nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + } else { + u64temp = (u64)nesqp->pbl_pbase; + nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + } + + /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n", + nesvnic->next_qp_nic_index, + nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + nesqp->nesqp_context->misc2 |= cpu_to_le32( + (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] << + NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT); + nesvnic->next_qp_nic_index++; + if ((nesvnic->next_qp_nic_index > 3) || + (nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) { + nesvnic->next_qp_nic_index = 0; + } + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16); + u64temp = (u64)nesqp->hwqp.q2_pbase; + nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp))); + nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp)))); + nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM | + ((((u32)nesadapter->max_irrq_wr) << + NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK)); + if (disable_mpa_crc) { + nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n"); + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC); + } + + + /* Create the QP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n"); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + if (!virt_wqs) { + opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | + NES_CQP_QP_IWARP_STATE_IDLE; + } else { + opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS | + NES_CQP_QP_IWARP_STATE_IDLE; + } + opcode |= NES_CQP_QP_CQS_VALID; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", + nesqp->hwqp.qp_id); + ret = wait_event_timeout(cqp_request->waitq, + (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u," + " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, + cqp_request->major_code, cqp_request->minor_code); + if ((!ret) || (cqp_request->major_code)) { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); + kfree(nesqp->allocated_buffer); + if (!ret) { + return ERR_PTR(-ETIME); + } else { + return ERR_PTR(-EIO); + } + } else { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } + + if (ibpd->uobject) { + uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; + uresp.actual_sq_size = sq_size; + uresp.actual_rq_size = rq_size; + uresp.qp_id = nesqp->hwqp.qp_id; + uresp.nes_drv_opt = nes_drv_opt; + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-EFAULT); + } + } + + nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n", + nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp)); + spin_lock_init(&nesqp->lock); + init_waitqueue_head(&nesqp->state_waitq); + init_waitqueue_head(&nesqp->kick_waitq); + nes_add_ref(&nesqp->ibqp); + break; + default: + nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); + return ERR_PTR(-EINVAL); + break; + } + + /* update the QP table */ + nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; + nes_debug(NES_DBG_QP, "netdev refcnt=%u\n", + atomic_read(&nesvnic->netdev->refcnt)); + + return &nesqp->ibqp; +} + + +/** + * nes_destroy_qp + */ +static int nes_destroy_qp(struct ib_qp *ibqp) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */ + struct nes_ucontext *nes_ucontext; + struct ib_qp_attr attr; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + int ret; + + atomic_inc(&sw_qps_destroyed); + nesqp->destroyed = 1; + + /* Blow away the connection if it exists. */ + if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) { + /* if (nesqp->ibqp_state == IB_QPS_RTS) { */ + attr.qp_state = IB_QPS_ERR; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + } + + if (((nesqp->ibqp_state == IB_QPS_INIT) || + (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) { + cm_id = nesqp->cm_id; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = IW_CM_EVENT_STATUS_TIMEOUT; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for " + "QP%u. cm_id = %p, refcount = %u. \n", + nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount)); + + cm_id->rem_ref(cm_id); + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret); + } + + + if (nesqp->user_mode) { + if ((ibqp->uobject)&&(ibqp->uobject->context)) { + nes_ucontext = to_nesucontext(ibqp->uobject->context); + clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); + nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL; + if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) { + nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index; + } + } + if (nesqp->pbl_pbase) + kunmap(nesqp->page); + } + + nes_rem_ref(&nesqp->ibqp); + return 0; +} + + +/** + * nes_create_cq + */ +static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, + struct ib_ucontext *context, struct ib_udata *udata) +{ + u64 u64temp; + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_cq *nescq; + struct nes_ucontext *nes_ucontext = NULL; + struct nes_cqp_request *cqp_request; + void *mem = NULL; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_pbl *nespbl = NULL; + struct nes_create_cq_req req; + struct nes_create_cq_resp resp; + u32 cq_num = 0; + u32 opcode = 0; + u32 pbl_entries = 1; + int err; + unsigned long flags; + int ret; + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, + nesadapter->max_cq, &cq_num, &nesadapter->next_cq); + if (err) { + return ERR_PTR(err); + } + + nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); + if (!nescq) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n"); + return ERR_PTR(-ENOMEM); + } + + nescq->hw_cq.cq_size = max(entries + 1, 5); + nescq->hw_cq.cq_number = cq_num; + nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1; + + + if (context) { + nes_ucontext = to_nesucontext(context); + if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EFAULT); + } + nesvnic->mcrq_ucontext = nes_ucontext; + nes_ucontext->mcrqf = req.mcrqf; + if (nes_ucontext->mcrqf) { + if (nes_ucontext->mcrqf & 0x80000000) + nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 12 + (nes_ucontext->mcrqf & 0xf) - 1; + else if (nes_ucontext->mcrqf & 0x40000000) + nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff; + else + nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1; + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + } + nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", + (unsigned long)req.user_cq_buffer, entries); + list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_cq_buffer) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n", + nespbl); + break; + } + } + if (err) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(err); + } + + pbl_entries = nespbl->pbl_size >> 3; + nescq->cq_mem_size = 0; + } else { + nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe); + nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n", + entries, nescq->cq_mem_size, nescq->hw_cq.cq_number); + + /* allocate the physical buffer space */ + mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size, + &nescq->hw_cq.cq_pbase); + if (!mem) { + printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n"); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } + + memset(mem, 0, nescq->cq_mem_size); + nescq->hw_cq.cq_vbase = mem; + nescq->hw_cq.cq_head = 0; + nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n", + nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase, + (u32)nescq->hw_cq.cq_pbase); + } + + nescq->hw_cq.ce_handler = nes_iwarp_ce_handler; + spin_lock_init(&nescq->lock); + + /* send CreateCQ request to CQP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + NES_CQP_CQ_CHK_OVERFLOW | + NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16); + + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + if (pbl_entries != 1) { + if (pbl_entries > 32) { + /* use 4k pbl */ + nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); + if (nesadapter->free_4kpbl == 0) { + if (cqp_request->dynamic) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kfree(cqp_request); + } else { + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } else { + opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK); + nescq->virtual_cq = 2; + nesadapter->free_4kpbl--; + } + } else { + /* use 256 byte pbl */ + nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); + if (nesadapter->free_256pbl == 0) { + if (cqp_request->dynamic) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kfree(cqp_request); + } else { + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } else { + opcode |= NES_CQP_CQ_VIRT; + nescq->virtual_cq = 1; + nesadapter->free_256pbl--; + } + } + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16))); + + if (context) { + if (pbl_entries != 1) + u64temp = (u64)nespbl->pbl_pbase; + else + u64temp = le64_to_cpu(nespbl->pbl_vbase[0]); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX, + nes_ucontext->mmap_db_index[0]); + } else { + u64temp = (u64)nescq->hw_cq.cq_pbase; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + } + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (u64)(unsigned long)&nescq->hw_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = + cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", + nescq->hw_cq.cq_number); + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT * 2); + nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", + nescq->hw_cq.cq_number, ret); + if ((!ret) || (cqp_request->major_code)) { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + nes_debug(NES_DBG_CQ, "iWARP CQ%u create timeout expired, major code = 0x%04X," + " minor code = 0x%04X\n", + nescq->hw_cq.cq_number, cqp_request->major_code, cqp_request->minor_code); + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EIO); + } else { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } + + if (context) { + /* free the nespbl */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + resp.cq_id = nescq->hw_cq.cq_number; + resp.cq_size = nescq->hw_cq.cq_size; + resp.mmap_db_index = 0; + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EFAULT); + } + } + + return &nescq->ibcq; +} + + +/** + * nes_destroy_cq + */ +static int nes_destroy_cq(struct ib_cq *ib_cq) +{ + struct nes_cq *nescq; + struct nes_device *nesdev; + struct nes_vnic *nesvnic; + struct nes_adapter *nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + u32 opcode = 0; + int ret; + + if (ib_cq == NULL) + return 0; + + nescq = to_nescq(ib_cq); + nesvnic = to_nesvnic(ib_cq->device); + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number); + + /* Send DestroyCQ request to CQP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nescq->virtual_cq == 1) { + nesadapter->free_256pbl++; + if (nesadapter->free_256pbl > nesadapter->max_256pbl) { + printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n", + __FUNCTION__, nesadapter->free_256pbl, nesadapter->max_256pbl); + } + } else if (nescq->virtual_cq == 2) { + nesadapter->free_4kpbl++; + if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { + printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n", + __FUNCTION__, nesadapter->free_4kpbl, nesadapter->max_4kpbl); + } + opcode |= NES_CQP_CQ_4KB_CHUNK; + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", + nescq->hw_cq.cq_number); + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + nescq->hw_cq.cq_number, ret, cqp_request->major_code, + cqp_request->minor_code); + if ((!ret) || (cqp_request->major_code)) { + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + if (!ret) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", + nescq->hw_cq.cq_number); + ret = -ETIME; + } else { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", + nescq->hw_cq.cq_number); + ret = -EIO; + } + } else { + ret = 0; + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + } + + if (nescq->cq_mem_size) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, + (void *)nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase); + kfree(nescq); + + return ret; +} + + +/** + * nes_reg_mr + */ +static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, + u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl, + dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count, + int acc, u64 *iova_start) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + int ret; + struct nes_adapter *nesadapter = nesdev->nesadapter; + /* int count; */ + u32 opcode = 0; + u16 major_code; + + /* Register the region with the adapter */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + /* track PBL resources */ + if (pbl_count != 0) { + if (pbl_count > 1) { + /* Two level PBL */ + if ((pbl_count+1) > nesadapter->free_4kpbl) { + nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n"); + if (cqp_request->dynamic) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kfree(cqp_request); + } else { + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + return -ENOMEM; + } else { + nesadapter->free_4kpbl -= pbl_count+1; + } + } else if (residual_page_count > 32) { + if (pbl_count > nesadapter->free_4kpbl) { + nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n"); + if (cqp_request->dynamic) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kfree(cqp_request); + } else { + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + return -ENOMEM; + } else { + nesadapter->free_4kpbl -= pbl_count; + } + } else { + if (pbl_count > nesadapter->free_256pbl) { + nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n"); + if (cqp_request->dynamic) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kfree(cqp_request); + } else { + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + return -ENOMEM; + } else { + nesadapter->free_256pbl -= pbl_count; + } + } + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ | + NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; + if (acc & IB_ACCESS_LOCAL_WRITE) + opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE; + if (acc & IB_ACCESS_REMOTE_WRITE) + opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN; + if (acc & IB_ACCESS_REMOTE_READ) + opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN; + if (acc & IB_ACCESS_MW_BIND) + opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN; + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length); + + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = + cpu_to_le32((u32)(region_length >> 8) & 0xff000000); + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |= + cpu_to_le32(nespd->pd_id & 0x00007fff); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + + if (pbl_count == 0) { + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer); + } else { + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, + (((pbl_count - 1) * 4096) + (residual_page_count*8))); + + if ((pbl_count > 1) || (residual_page_count > 32)) + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); + } + barrier(); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + stag, ret, cqp_request->major_code, cqp_request->minor_code); + major_code = cqp_request->major_code; + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + else + return 0; + + return 0; +} + + +/** + * nes_reg_phys_mr + */ +static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, + struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, + u64 * iova_start) +{ + u64 region_length; + struct nes_pd *nespd = to_nespd(ib_pd); + struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_mr *nesmr; + struct ib_mr *ibmr; + struct nes_vpbl vpbl; + struct nes_root_vpbl root_vpbl; + u32 stag; + u32 i; + u32 stag_index = 0; + u32 next_stag_index = 0; + u32 driver_key = 0; + u32 root_pbl_index = 0; + u32 cur_pbl_index = 0; + int err = 0, pbl_depth = 0; + int ret = 0; + u16 pbl_count = 0; + u8 single_page = 1; + u8 stag_key = 0; + + pbl_depth = 0; + region_length = 0; + vpbl.pbl_vbase = NULL; + root_vpbl.pbl_vbase = NULL; + root_vpbl.pbl_pbase = 0; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = 0; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + if (num_phys_buf > (1024*512)) { + return ERR_PTR(-E2BIG); + } + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, + &stag_index, &next_stag_index); + if (err) { + return ERR_PTR(err); + } + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < num_phys_buf; i++) { + + if ((i & 0x01FF) == 0) { + if (root_pbl_index == 1) { + /* Allocate the root PBL */ + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, + &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + return ERR_PTR(-ENOMEM); + } + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + return ERR_PTR(-ENOMEM); + } + root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; + } + /* Allocate a 4K buffer for the PBL */ + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n", + vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_phys_err; + } + /* Fill in the root table */ + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + if (buffer_list[i].addr & ~PAGE_MASK) { + /* TODO: Unwind allocated buffers */ + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) buffer_list[i].addr); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_phys_err; + } + + if (!buffer_list[i].size) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_phys_err; + } + + region_length += buffer_list[i].size; + if ((i != 0) && (single_page)) { + if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr) + single_page = 0; + } + vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr); + vpbl.pbl_vbase[cur_pbl_index++].pa_high = + cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32))); + } + + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX," + " length = 0x%016lX, index = 0x%08X\n", + stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index); + + region_length -= (*iova_start)&PAGE_MASK; + + /* Make the leaf PBL the root if only one PBL */ + if (root_pbl_index == 1) { + root_vpbl.pbl_pbase = vpbl.pbl_pbase; + } + + if (single_page) { + pbl_count = 0; + } else { + pbl_count = root_pbl_index; + } + ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, + buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start); + + if (ret == 0) { + nesmr->ibmr.rkey = stag; + nesmr->ibmr.lkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_MEM; + ibmr = &nesmr->ibmr; + nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0; + nesmr->pbls_used = pbl_count; + if (pbl_count > 1) { + nesmr->pbls_used++; + } + } else { + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + } + + reg_phys_err: + /* free the resources */ + if (root_pbl_index == 1) { + /* single PBL case */ + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); + } else { + for (i=0; i<root_pbl_index; i++) { + pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase, + root_vpbl.leaf_vpbl[i].pbl_pbase); + } + kfree(root_vpbl.leaf_vpbl); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + } + + return ibmr; +} + + +/** + * nes_get_dma_mr + */ +static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva = 0; + + nes_debug(NES_DBG_MR, "\n"); + + bl.size = (u64)0xffffffffffULL; + bl.addr = 0; + return nes_reg_phys_mr(pd, &bl, 1, acc, &kva); +} + + +/** + * nes_reg_user_mr + */ +static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + u64 iova_start; + __le64 *pbl; + u64 region_length; + dma_addr_t last_dma_addr = 0; + dma_addr_t first_dma_addr = 0; + struct nes_pd *nespd = to_nespd(pd); + struct nes_vnic *nesvnic = to_nesvnic(pd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct ib_mr *ibmr = ERR_PTR(-EINVAL); + struct ib_umem_chunk *chunk; + struct nes_ucontext *nes_ucontext; + struct nes_pbl *nespbl; + struct nes_mr *nesmr; + struct ib_umem *region; + struct nes_mem_reg_req req; + struct nes_vpbl vpbl; + struct nes_root_vpbl root_vpbl; + int nmap_index, page_index; + int page_count = 0; + int err, pbl_depth = 0; + int chunk_pages; + int ret; + u32 stag; + u32 stag_index = 0; + u32 next_stag_index; + u32 driver_key; + u32 root_pbl_index = 0; + u32 cur_pbl_index = 0; + u32 skip_pages; + u16 pbl_count; + u8 single_page = 1; + u8 stag_key; + + region = ib_umem_get(pd->uobject->context, start, length, acc); + if (IS_ERR(region)) { + return (struct ib_mr *)region; + } + + nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u," + " offset = %u, page size = %u.\n", + (unsigned long int)start, (unsigned long int)virt, (u32)length, + region->offset, region->page_size); + + skip_pages = ((u32)region->offset) >> 12; + + if (ib_copy_from_udata(&req, udata, sizeof(req))) + return ERR_PTR(-EFAULT); + nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type); + + switch (req.reg_type) { + case IWNES_MEMREG_TYPE_MEM: + pbl_depth = 0; + region_length = 0; + vpbl.pbl_vbase = NULL; + root_vpbl.pbl_vbase = NULL; + root_vpbl.pbl_pbase = 0; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = next_stag_index & 0x70000000; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, &next_stag_index); + if (err) { + ib_umem_release(region); + return ERR_PTR(err); + } + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + nesmr->region = region; + + list_for_each_entry(chunk, ®ion->chunk_list, list) { + nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", + chunk->nents, chunk->nmap); + for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { + if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } + + if (!sg_dma_len(&chunk->page_list[nmap_index])) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } + + region_length += sg_dma_len(&chunk->page_list[nmap_index]); + chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; + region_length -= skip_pages << 12; + for (page_index=skip_pages; page_index < chunk_pages; page_index++) { + skip_pages = 0; + if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) + goto enough_pages; + if ((page_count&0x01FF) == 0) { + if (page_count>(1024*512)) { + ib_umem_release(region); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, + nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-E2BIG); + goto reg_user_mr_err; + } + if (root_pbl_index == 1) { + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 8192, &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { + ib_umem_release(region); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + goto reg_user_mr_err; + } + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { + ib_umem_release(region); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + goto reg_user_mr_err; + } + root_vpbl.pbl_vbase[0].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; + } + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", + vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_user_mr_err; + } + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+4096) != + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))) + single_page = 0; + last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096); + } else { + first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096); + last_dma_addr = first_dma_addr; + } + } + + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))) >> 32))); + cur_pbl_index++; + page_count++; + } + } + } + enough_pages: + nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," + " stag_key=0x%08x\n", + stag_index, driver_key, stag_key); + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + if (stag == 0) { + stag = 1; + } + + iova_start = virt; + /* Make the leaf PBL the root if only one PBL */ + if (root_pbl_index == 1) { + root_vpbl.pbl_pbase = vpbl.pbl_pbase; + } + + if (single_page) { + pbl_count = 0; + } else { + pbl_count = root_pbl_index; + first_dma_addr = 0; + } + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X," + " index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n", + stag, (unsigned int)iova_start, + (unsigned int)region_length, stag_index, + (unsigned long long)region->length, pbl_count); + ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl, + first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start); + + nes_debug(NES_DBG_MR, "ret=%d\n", ret); + + if (ret == 0) { + nesmr->ibmr.rkey = stag; + nesmr->ibmr.lkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_MEM; + ibmr = &nesmr->ibmr; + nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0; + nesmr->pbls_used = pbl_count; + if (pbl_count > 1) { + nesmr->pbls_used++; + } + } else { + ib_umem_release(region); + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + } + + reg_user_mr_err: + /* free the resources */ + if (root_pbl_index == 1) { + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + } else { + for (page_index=0; page_index<root_pbl_index; page_index++) { + pci_free_consistent(nesdev->pcidev, 4096, + root_vpbl.leaf_vpbl[page_index].pbl_vbase, + root_vpbl.leaf_vpbl[page_index].pbl_pbase); + } + kfree(root_vpbl.leaf_vpbl); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + } + + nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr); + + return ibmr; + break; + case IWNES_MEMREG_TYPE_QP: + case IWNES_MEMREG_TYPE_CQ: + nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); + if (!nespbl) { + nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); + ib_umem_release(region); + return ERR_PTR(-ENOMEM); + } + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + ib_umem_release(region); + kfree(nespbl); + nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n"); + return ERR_PTR(-ENOMEM); + } + nesmr->region = region; + nes_ucontext = to_nesucontext(pd->uobject->context); + pbl_depth = region->length >> 12; + pbl_depth += (region->length & (4096-1)) ? 1 : 0; + nespbl->pbl_size = pbl_depth*sizeof(u64); + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { + nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory"); + } else { + nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory"); + } + + nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n", + nespbl->pbl_size, pbl_depth); + pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size, + &nespbl->pbl_pbase); + if (!pbl) { + ib_umem_release(region); + kfree(nesmr); + kfree(nespbl); + nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n"); + return ERR_PTR(-ENOMEM); + } + + nespbl->pbl_vbase = (u64 *)pbl; + nespbl->user_base = start; + nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%p," + " pbl_vbase=%p user_base=0x%lx\n", + nespbl->pbl_size, (void *)nespbl->pbl_pbase, + (void*)nespbl->pbl_vbase, nespbl->user_base); + + list_for_each_entry(chunk, ®ion->chunk_list, list) { + for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { + chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; + chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0; + nespbl->page = sg_page(&chunk->page_list[0]); + for (page_index=0; page_index<chunk_pages; page_index++) { + ((__le32 *)pbl)[0] = cpu_to_le32((u32) + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))); + ((__le32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); + pbl++; + } + } + } + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { + list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); + } else { + list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list); + } + nesmr->ibmr.rkey = -1; + nesmr->ibmr.lkey = -1; + nesmr->mode = req.reg_type; + return &nesmr->ibmr; + break; + } + + return ERR_PTR(-ENOSYS); +} + + +/** + * nes_dereg_mr + */ +static int nes_dereg_mr(struct ib_mr *ib_mr) +{ + struct nes_mr *nesmr = to_nesmr(ib_mr); + struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + int ret; + u16 major_code; + u16 minor_code; + + if (nesmr->region) { + ib_umem_release(nesmr->region); + } + if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) { + kfree(nesmr); + return 0; + } + + /* Deallocate the region with the adapter */ + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesmr->pbls_used != 0) { + if (nesmr->pbl_4k) { + nesadapter->free_4kpbl += nesmr->pbls_used; + if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { + printk(KERN_ERR PFX "free 4KB PBLs(%u) has exceeded the max(%u)\n", + nesadapter->free_4kpbl, nesadapter->max_4kpbl); + } + } else { + nesadapter->free_256pbl += nesmr->pbls_used; + if (nesadapter->free_256pbl > nesadapter->max_256pbl) { + printk(KERN_ERR PFX "free 256B PBLs(%u) has exceeded the max(%u)\n", + nesadapter->free_256pbl, nesadapter->max_256pbl); + } + } + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO | + NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X\n", + ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code); + + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + (ib_mr->rkey & 0x0fffff00) >> 8); + + kfree(nesmr); + + major_code = cqp_request->major_code; + minor_code = cqp_request->minor_code; + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + if (!ret) { + nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," + " ib_mr=%p, rkey = 0x%08X\n", + ib_mr, ib_mr->rkey); + return -ETIME; + } else if (major_code) { + nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting" + " to destroy STag, ib_mr=%p, rkey = 0x%08X\n", + major_code, minor_code, ib_mr, ib_mr->rkey); + return -EIO; + } else + return 0; +} + + +/** + * show_rev + */ +static ssize_t show_rev(struct class_device *cdev, char *buf) +{ + struct nes_ib_device *nesibdev = + container_of(cdev, struct nes_ib_device, ibdev.class_dev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev); +} + + +/** + * show_fw_ver + */ +static ssize_t show_fw_ver(struct class_device *cdev, char *buf) +{ + struct nes_ib_device *nesibdev = + container_of(cdev, struct nes_ib_device, ibdev.class_dev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "%x.%x.%x\n", + (int)(nesvnic->nesdev->nesadapter->fw_ver >> 32), + (int)(nesvnic->nesdev->nesadapter->fw_ver >> 16) & 0xffff, + (int)(nesvnic->nesdev->nesadapter->fw_ver & 0xffff)); +} + + +/** + * show_hca + */ +static ssize_t show_hca(struct class_device *cdev, char *buf) +{ + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "NES020\n"); +} + + +/** + * show_board + */ +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "%.*s\n", 32, "NES020 Board ID"); +} + + +static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct class_device_attribute *nes_class_attributes[] = { + &class_device_attr_hw_rev, + &class_device_attr_fw_ver, + &class_device_attr_hca_type, + &class_device_attr_board_id +}; + + +/** + * nes_query_qp + */ +static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + + nes_debug(NES_DBG_QP, "\n"); + + attr->qp_access_flags = 0; + attr->cap.max_send_wr = nesqp->hwqp.sq_size; + attr->cap.max_recv_wr = nesqp->hwqp.rq_size; + attr->cap.max_recv_sge = 1; + if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) { + init_attr->cap.max_inline_data = 0; + } else { + init_attr->cap.max_inline_data = 64; + } + + init_attr->event_handler = nesqp->ibqp.event_handler; + init_attr->qp_context = nesqp->ibqp.qp_context; + init_attr->send_cq = nesqp->ibqp.send_cq; + init_attr->recv_cq = nesqp->ibqp.recv_cq; + init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq; + init_attr->cap = attr->cap; + + return 0; +} + + +/** + * nes_hw_modify_qp + */ +int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, + u32 next_iwarp_state, u32 wait_completion) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + /* struct iw_cm_id *cm_id = nesqp->cm_id; */ + /* struct iw_cm_event cm_event; */ + struct nes_cqp_request *cqp_request; + unsigned long flags; + int ret; + u16 major_code; + + nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + if (wait_completion) { + cqp_request->waiting = 1; + } else { + cqp_request->waiting = 0; + } + cqp_wqe = &cqp_request->cqp_wqe; + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state); + nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n", + next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])); + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + + /* Wait for CQP */ + if (wait_completion) { + /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n", + nesqp->hwqp.qp_id); */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, " + "CQP Major:Minor codes = 0x%04X:0x%04X.\n", + nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code); + major_code = cqp_request->major_code; + if (major_code) { + nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed" + "CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n", + nesqp->hwqp.qp_id, cqp_request->major_code, + cqp_request->minor_code, next_iwarp_state); + } + if (atomic_dec_and_test(&cqp_request->refcount)) { + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + } + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + else + return 0; + } else { + return 0; + } +} + + +/** + * nes_modify_qp + */ +int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + /* u32 cqp_head; */ + /* u32 counter; */ + u32 next_iwarp_state = 0; + int err; + unsigned long qplockflags; + int ret; + u16 original_last_aeq; + u8 issue_modify_qp = 0; + u8 issue_disconnect = 0; + u8 dont_wait = 0; + + nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u," + " iwarp_state=0x%X, refcount=%d\n", + nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, + nesqp->iwarp_state, atomic_read(&nesqp->refcount)); + + nes_add_ref(&nesqp->ibqp); + spin_lock_irqsave(&nesqp->lock, qplockflags); + + nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," + " QP Access Flags=0x%X, attr_mask = 0x%0x\n", + nesqp->hwqp.qp_id, nesqp->hw_iwarp_state, + nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask); + + if (attr_mask & IB_QP_STATE) { + switch (attr->qp_state) { + case IB_QPS_INIT: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; + issue_modify_qp = 1; + break; + case IB_QPS_RTR: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; + issue_modify_qp = 1; + break; + case IB_QPS_RTS: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + if (nesqp->cm_id == NULL) { + nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", + nesqp->hwqp.qp_id ); + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; + if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS) + next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID | + NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID; + issue_modify_qp = 1; + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS; + nesqp->hte_added = 1; + break; + case IB_QPS_SQD: + issue_modify_qp = 1; + nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); + if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return 0; + } else { + if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { + nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing" + " ignored due to current iWARP state\n", + nesqp->hwqp.qp_id); + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { + nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing" + " already done based on hw state.\n", + nesqp->hwqp.qp_id); + issue_modify_qp = 0; + nesqp->in_disconnect = 0; + } + switch (nesqp->hw_iwarp_state) { + case NES_AEQE_IWARP_STATE_CLOSING: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + case NES_AEQE_IWARP_STATE_TERMINATE: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; + break; + case NES_AEQE_IWARP_STATE_ERROR: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; + break; + default: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->in_disconnect = 1; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + break; + } + } + break; + case IB_QPS_SQE: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ + next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; + issue_modify_qp = 1; + nesqp->in_disconnect = 1; + break; + case IB_QPS_ERR: + case IB_QPS_RESET: + if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + } + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", + nesqp->hwqp.qp_id); + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; + /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ + if (nesqp->hte_added) { + nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n"); + next_iwarp_state |= NES_CQP_QP_DEL_HTE; + nesqp->hte_added = 0; + } + if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && + (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { + next_iwarp_state |= NES_CQP_QP_RESET; + nesqp->in_disconnect = 1; + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n", + nesqp->hwqp.qp_id, nesqp->hw_tcp_state); + dont_wait = 1; + } + issue_modify_qp = 1; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; + break; + default: + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_rem_ref(&nesqp->ibqp); + return -EINVAL; + break; + } + + nesqp->ibqp_state = attr->qp_state; + if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == + (u32)NES_CQP_QP_IWARP_STATE_RTS) && + ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) > + (u32)NES_CQP_QP_IWARP_STATE_RTS)) { + nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; + nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", + nesqp->iwarp_state); + issue_disconnect = 1; + } else { + nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; + nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", + nesqp->iwarp_state); + } + } + + if (attr_mask & IB_QP_ACCESS_FLAGS) { + if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN | + NES_QPCONTEXT_MISC_RDMA_READ_EN); + issue_modify_qp = 1; + } + if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN); + issue_modify_qp = 1; + } + if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN); + issue_modify_qp = 1; + } + if (attr->qp_access_flags & IB_ACCESS_MW_BIND) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN); + issue_modify_qp = 1; + } + + if (nesqp->user_mode) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN | + NES_QPCONTEXT_MISC_RDMA_READ_EN); + issue_modify_qp = 1; + } + } + + original_last_aeq = nesqp->last_aeq; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + + nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp); + + ret = 0; + + + if (issue_modify_qp) { + nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n"); + ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1); + if (ret) + nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)" + " failed for QP%u.\n", + next_iwarp_state, nesqp->hwqp.qp_id); + + } + + if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) { + nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d)," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + if ((!ret) || + ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) && + (ret))) { + if (dont_wait) { + if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { + nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + /* this one is for the cm_disconnect thread */ + nes_add_ref(&nesqp->ibqp); + spin_lock_irqsave(&nesqp->lock, qplockflags); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); + nes_rem_ref(&nesqp->ibqp); + } + } else { + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + /* These two are for the timer thread */ + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + nes_add_ref(&nesqp->ibqp); + nesqp->cm_id->add_ref(nesqp->cm_id); + nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," + " need ae to finish up, original_last_aeq = 0x%04X." + " last_aeq = 0x%04X, scheduling timer.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); + } + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," + " need ae to finish up, original_last_aeq = 0x%04X." + " last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + } + } + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + nes_rem_ref(&nesqp->ibqp); + } + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + nes_rem_ref(&nesqp->ibqp); + } + + err = 0; + + nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); + + return err; +} + + +/** + * nes_muticast_attach + */ +static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + nes_debug(NES_DBG_INIT, "\n"); + return -ENOSYS; +} + + +/** + * nes_multicast_detach + */ +static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + nes_debug(NES_DBG_INIT, "\n"); + return -ENOSYS; +} + + +/** + * nes_process_mad + */ +static int nes_process_mad(struct ib_device *ibdev, int mad_flags, + u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + nes_debug(NES_DBG_INIT, "\n"); + return -ENOSYS; +} + +static inline void +fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey) +{ + int sge_index; + int total_payload_length = 0; + for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) { + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4), + ib_wr->sg_list[sge_index].length); + if (uselkey) + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), + (ib_wr->sg_list[sge_index].lkey)); + else + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0); + + total_payload_length += ib_wr->sg_list[sge_index].length; + } + nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n", + total_payload_length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + total_payload_length); +} + +/** + * nes_post_send + */ +static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr) +{ + u64 u64temp; + unsigned long flags = 0; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_hw_qp_wqe *wqe; + int err; + u32 qsize = nesqp->hwqp.sq_size; + u32 head; + u32 wqe_misc; + u32 wqe_count; + u32 counter; + u32 total_payload_length; + + err = 0; + wqe_misc = 0; + wqe_count = 0; + total_payload_length = 0; + + if (nesqp->ibqp_state > IB_QPS_RTS) + return -EINVAL; + + spin_lock_irqsave(&nesqp->lock, flags); + + head = nesqp->hwqp.sq_head; + + while (ib_wr) { + /* Check for SQ overflow */ + if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { + err = -EINVAL; + break; + } + + wqe = &nesqp->hwqp.sq_vbase[head]; + /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n", + nesqp->hwqp.qp_id, wqe, head); */ + nes_fill_init_qp_wqe(wqe, nesqp, head); + u64temp = (u64)(ib_wr->wr_id); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, + u64temp); + switch (ib_wr->opcode) { + case IB_WR_SEND: + if (ib_wr->send_flags & IB_SEND_SOLICITED) { + wqe_misc = NES_IWARP_SQ_OP_SENDSE; + } else { + wqe_misc = NES_IWARP_SQ_OP_SEND; + } + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + } + if ((ib_wr->send_flags & IB_SEND_INLINE) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && + (ib_wr->sg_list[0].length <= 64)) { + memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], + (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + ib_wr->sg_list[0].length); + wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; + } else { + fill_wqe_sg_send(wqe, ib_wr, 1); + } + + break; + case IB_WR_RDMA_WRITE: + wqe_misc = NES_IWARP_SQ_OP_RDMAW; + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n", + ib_wr->num_sge, + nesdev->nesadapter->max_sge); + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + } + + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, + ib_wr->wr.rdma.rkey); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, + ib_wr->wr.rdma.remote_addr); + + if ((ib_wr->send_flags & IB_SEND_INLINE) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && + (ib_wr->sg_list[0].length <= 64)) { + memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], + (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + ib_wr->sg_list[0].length); + wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; + } else { + fill_wqe_sg_send(wqe, ib_wr, 1); + } + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]; + break; + case IB_WR_RDMA_READ: + /* iWARP only supports 1 sge for RDMA reads */ + if (ib_wr->num_sge > 1) { + nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n", + ib_wr->num_sge); + err = -EINVAL; + break; + } + wqe_misc = NES_IWARP_SQ_OP_RDMAR; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, + ib_wr->wr.rdma.remote_addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, + ib_wr->wr.rdma.rkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, + ib_wr->sg_list->length); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, + ib_wr->sg_list->addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX, + ib_wr->sg_list->lkey); + break; + default: + /* error */ + err = -EINVAL; + break; + } + + if (ib_wr->send_flags & IB_SEND_SIGNALED) { + wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; + } + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc); + + ib_wr = ib_wr->next; + head++; + wqe_count++; + if (head >= qsize) + head = 0; + + } + + nesqp->hwqp.sq_head = head; + barrier(); + while (wqe_count) { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id); + } + + spin_unlock_irqrestore(&nesqp->lock, flags); + + if (err) + *bad_wr = ib_wr; + return err; +} + + +/** + * nes_post_recv + */ +static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr) +{ + u64 u64temp; + unsigned long flags = 0; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_hw_qp_wqe *wqe; + int err = 0; + int sge_index; + u32 qsize = nesqp->hwqp.rq_size; + u32 head; + u32 wqe_count = 0; + u32 counter; + u32 total_payload_length; + + if (nesqp->ibqp_state > IB_QPS_RTS) + return -EINVAL; + + spin_lock_irqsave(&nesqp->lock, flags); + + head = nesqp->hwqp.rq_head; + + while (ib_wr) { + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + err = -EINVAL; + break; + } + /* Check for RQ overflow */ + if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { + err = -EINVAL; + break; + } + + nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge); + wqe = &nesqp->hwqp.rq_vbase[head]; + + /* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n", + nesqp->hwqp.qp_id, wqe, head); */ + nes_fill_init_qp_wqe(wqe, nesqp, head); + u64temp = (u64)(ib_wr->wr_id); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, + u64temp); + total_payload_length = 0; + for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) { + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].length); + set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].lkey); + + total_payload_length += ib_wr->sg_list[sge_index].length; + } + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX, + total_payload_length); + + ib_wr = ib_wr->next; + head++; + wqe_count++; + if (head >= qsize) + head = 0; + } + + nesqp->hwqp.rq_head = head; + barrier(); + while (wqe_count) { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id); + } + + spin_unlock_irqrestore(&nesqp->lock, flags); + + if (err) + *bad_wr = ib_wr; + return err; +} + + +/** + * nes_poll_cq + */ +static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + u64 u64temp; + u64 wrid; + /* u64 u64temp; */ + unsigned long flags = 0; + struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_cq *nescq = to_nescq(ibcq); + struct nes_qp *nesqp; + struct nes_hw_cqe cqe; + u32 head; + u32 wq_tail; + u32 cq_size; + u32 cqe_count = 0; + u32 wqe_index; + u32 u32temp; + /* u32 counter; */ + + nes_debug(NES_DBG_CQ, "\n"); + + spin_lock_irqsave(&nescq->lock, flags); + + head = nescq->hw_cq.cq_head; + cq_size = nescq->hw_cq.cq_size; + + while (cqe_count < num_entries) { + if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & + NES_CQE_VALID) { + cqe = nescq->hw_cq.cq_vbase[head]; + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; + u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + wqe_index = u32temp & + (nesdev->nesadapter->max_qp_wr - 1); + u32temp &= ~(NES_SW_CONTEXT_ALIGN-1); + /* parse CQE, get completion context from WQE (either rq or sq */ + u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | + ((u64)u32temp); + nesqp = *((struct nes_qp **)&u64temp); + memset(entry, 0, sizeof *entry); + if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) { + entry->status = IB_WC_SUCCESS; + } else { + entry->status = IB_WC_WR_FLUSH_ERR; + } + + entry->qp = &nesqp->ibqp; + entry->src_qp = nesqp->hwqp.qp_id; + + if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) { + if (nesqp->skip_lsmm) { + nesqp->skip_lsmm = 0; + wq_tail = nesqp->hwqp.sq_tail++; + } + + /* Working on a SQ Completion*/ + wq_tail = wqe_index; + nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); + wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | + ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); + entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]); + + switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) { + case NES_IWARP_SQ_OP_RDMAW: + nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n"); + entry->opcode = IB_WC_RDMA_WRITE; + break; + case NES_IWARP_SQ_OP_RDMAR: + nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n"); + entry->opcode = IB_WC_RDMA_READ; + entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]); + break; + case NES_IWARP_SQ_OP_SENDINV: + case NES_IWARP_SQ_OP_SENDSEINV: + case NES_IWARP_SQ_OP_SEND: + case NES_IWARP_SQ_OP_SENDSE: + nes_debug(NES_DBG_CQ, "Operation = Send.\n"); + entry->opcode = IB_WC_SEND; + break; + } + } else { + /* Working on a RQ Completion*/ + wq_tail = wqe_index; + nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1); + entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]); + wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | + ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); + entry->opcode = IB_WC_RECV; + } + entry->wr_id = wrid; + + if (++head >= cq_size) + head = 0; + cqe_count++; + nescq->polled_completions++; + if ((nescq->polled_completions > (cq_size / 2)) || + (nescq->polled_completions == 255)) { + nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes" + " are pending %u of %u.\n", + nescq->hw_cq.cq_number, nescq->polled_completions, cq_size); + nes_write32(nesdev->regs+NES_CQE_ALLOC, + nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); + nescq->polled_completions = 0; + } + entry++; + } else + break; + } + + if (nescq->polled_completions) { + nes_write32(nesdev->regs+NES_CQE_ALLOC, + nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); + nescq->polled_completions = 0; + } + + nescq->hw_cq.cq_head = head; + nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n", + cqe_count, nescq->hw_cq.cq_number); + + spin_unlock_irqrestore(&nescq->lock, flags); + + return cqe_count; +} + + +/** + * nes_req_notify_cq + */ +static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) + { + struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_cq *nescq = to_nescq(ibcq); + u32 cq_arm; + + nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n", + nescq->hw_cq.cq_number); + + cq_arm = nescq->hw_cq.cq_number; + if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) + cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT; + else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) + cq_arm |= NES_CQE_ALLOC_NOTIFY_SE; + else + return -EINVAL; + + nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + + return 0; +} + + +/** + * nes_init_ofa_device + */ +struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) +{ + struct nes_ib_device *nesibdev; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + + nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device)); + if (nesibdev == NULL) { + return NULL; + } + strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX); + nesibdev->ibdev.owner = THIS_MODULE; + + nesibdev->ibdev.node_type = RDMA_NODE_RNIC; + memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid)); + memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6); + + nesibdev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_BIND_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_POST_SEND); + + nesibdev->ibdev.phys_port_cnt = 1; + nesibdev->ibdev.num_comp_vectors = 1; + nesibdev->ibdev.dma_device = &nesdev->pcidev->dev; + nesibdev->ibdev.class_dev.dev = &nesdev->pcidev->dev; + nesibdev->ibdev.query_device = nes_query_device; + nesibdev->ibdev.query_port = nes_query_port; + nesibdev->ibdev.modify_port = nes_modify_port; + nesibdev->ibdev.query_pkey = nes_query_pkey; + nesibdev->ibdev.query_gid = nes_query_gid; + nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; + nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext; + nesibdev->ibdev.mmap = nes_mmap; + nesibdev->ibdev.alloc_pd = nes_alloc_pd; + nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; + nesibdev->ibdev.create_ah = nes_create_ah; + nesibdev->ibdev.destroy_ah = nes_destroy_ah; + nesibdev->ibdev.create_qp = nes_create_qp; + nesibdev->ibdev.modify_qp = nes_modify_qp; + nesibdev->ibdev.query_qp = nes_query_qp; + nesibdev->ibdev.destroy_qp = nes_destroy_qp; + nesibdev->ibdev.create_cq = nes_create_cq; + nesibdev->ibdev.destroy_cq = nes_destroy_cq; + nesibdev->ibdev.poll_cq = nes_poll_cq; + nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; + nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr; + nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; + nesibdev->ibdev.dereg_mr = nes_dereg_mr; + nesibdev->ibdev.alloc_mw = nes_alloc_mw; + nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; + nesibdev->ibdev.bind_mw = nes_bind_mw; + + nesibdev->ibdev.alloc_fmr = nes_alloc_fmr; + nesibdev->ibdev.unmap_fmr = nes_unmap_fmr; + nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr; + nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr; + + nesibdev->ibdev.attach_mcast = nes_multicast_attach; + nesibdev->ibdev.detach_mcast = nes_multicast_detach; + nesibdev->ibdev.process_mad = nes_process_mad; + + nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; + nesibdev->ibdev.post_send = nes_post_send; + nesibdev->ibdev.post_recv = nes_post_recv; + + nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); + if (nesibdev->ibdev.iwcm == NULL) { + ib_dealloc_device(&nesibdev->ibdev); + return NULL; + } + nesibdev->ibdev.iwcm->add_ref = nes_add_ref; + nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref; + nesibdev->ibdev.iwcm->get_qp = nes_get_qp; + nesibdev->ibdev.iwcm->connect = nes_connect; + nesibdev->ibdev.iwcm->accept = nes_accept; + nesibdev->ibdev.iwcm->reject = nes_reject; + nesibdev->ibdev.iwcm->create_listen = nes_create_listen; + nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; + + return nesibdev; +} + + +/** + * nes_destroy_ofa_device + */ +void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) +{ + if (nesibdev == NULL) + return; + + nes_unregister_ofa_device(nesibdev); + + kfree(nesibdev->ibdev.iwcm); + ib_dealloc_device(&nesibdev->ibdev); +} + + +/** + * nes_register_ofa_device + */ +int nes_register_ofa_device(struct nes_ib_device *nesibdev) +{ + struct nes_vnic *nesvnic = nesibdev->nesvnic; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + int i, ret; + + ret = ib_register_device(&nesvnic->nesibdev->ibdev); + if (ret) { + return ret; + } + + /* Get the resources allocated to this device */ + nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count; + nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count; + nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count; + nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count; + + for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) { + ret = class_device_create_file(&nesibdev->ibdev.class_dev, nes_class_attributes[i]); + if (ret) { + while (i > 0) { + i--; + class_device_remove_file(&nesibdev->ibdev.class_dev, + nes_class_attributes[i]); + } + ib_unregister_device(&nesibdev->ibdev); + return ret; + } + } + + nesvnic->of_device_registered = 1; + + return 0; +} + + +/** + * nes_unregister_ofa_device + */ +void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) +{ + struct nes_vnic *nesvnic = nesibdev->nesvnic; + int i; + + if (nesibdev == NULL) + return; + + for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) { + class_device_remove_file(&nesibdev->ibdev.class_dev, nes_class_attributes[i]); + } + + if (nesvnic->of_device_registered) { + ib_unregister_device(&nesibdev->ibdev); + } + + nesvnic->of_device_registered = 0; +} diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h new file mode 100644 index 000000000000..6c6b4da5184f --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef NES_VERBS_H +#define NES_VERBS_H + +struct nes_device; + +#define NES_MAX_USER_DB_REGIONS 4096 +#define NES_MAX_USER_WQ_REGIONS 4096 + +struct nes_ucontext { + struct ib_ucontext ibucontext; + struct nes_device *nesdev; + unsigned long mmap_wq_offset; + unsigned long mmap_cq_offset; /* to be removed */ + int index; /* rnic index (minor) */ + unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)]; + u16 mmap_db_index[NES_MAX_USER_DB_REGIONS]; + u16 first_free_db; + unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)]; + struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; + u16 first_free_wq; + struct list_head cq_reg_mem_list; + struct list_head qp_reg_mem_list; + u32 mcrqf; + atomic_t usecnt; +}; + +struct nes_pd { + struct ib_pd ibpd; + u16 pd_id; + atomic_t sqp_count; + u16 mmap_db_index; +}; + +struct nes_mr { + union { + struct ib_mr ibmr; + struct ib_mw ibmw; + struct ib_fmr ibfmr; + }; + struct ib_umem *region; + u16 pbls_used; + u8 mode; + u8 pbl_4k; +}; + +struct nes_hw_pb { + __le32 pa_low; + __le32 pa_high; +}; + +struct nes_vpbl { + dma_addr_t pbl_pbase; + struct nes_hw_pb *pbl_vbase; +}; + +struct nes_root_vpbl { + dma_addr_t pbl_pbase; + struct nes_hw_pb *pbl_vbase; + struct nes_vpbl *leaf_vpbl; +}; + +struct nes_fmr { + struct nes_mr nesmr; + u32 leaf_pbl_cnt; + struct nes_root_vpbl root_vpbl; + struct ib_qp *ib_qp; + int access_rights; + struct ib_fmr_attr attr; +}; + +struct nes_av; + +struct nes_cq { + struct ib_cq ibcq; + struct nes_hw_cq hw_cq; + u32 polled_completions; + u32 cq_mem_size; + spinlock_t lock; + u8 virtual_cq; + u8 pad[3]; +}; + +struct nes_wq { + spinlock_t lock; +}; + +struct iw_cm_id; +struct ietf_mpa_frame; + +struct nes_qp { + struct ib_qp ibqp; + void *allocated_buffer; + struct iw_cm_id *cm_id; + struct workqueue_struct *wq; + struct work_struct disconn_work; + struct nes_cq *nesscq; + struct nes_cq *nesrcq; + struct nes_pd *nespd; + void *cm_node; /* handle of the node this QP is associated with */ + struct ietf_mpa_frame *ietf_frame; + dma_addr_t ietf_frame_pbase; + wait_queue_head_t state_waitq; + unsigned long socket; + struct nes_hw_qp hwqp; + struct work_struct work; + struct work_struct ae_work; + enum ib_qp_state ibqp_state; + u32 iwarp_state; + u32 hte_index; + u32 last_aeq; + u32 qp_mem_size; + atomic_t refcount; + atomic_t close_timer_started; + u32 mmap_sq_db_index; + u32 mmap_rq_db_index; + spinlock_t lock; + struct nes_qp_context *nesqp_context; + dma_addr_t nesqp_context_pbase; + void *pbl_vbase; + dma_addr_t pbl_pbase; + struct page *page; + wait_queue_head_t kick_waitq; + u16 in_disconnect; + u16 private_data_len; + u8 active_conn; + u8 skip_lsmm; + u8 user_mode; + u8 hte_added; + u8 hw_iwarp_state; + u8 flush_issued; + u8 hw_tcp_state; + u8 disconn_pending; + u8 destroyed; +}; +#endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index eb7edab0e836..fe250c60607d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -56,42 +56,43 @@ /* constants */ enum { - IPOIB_PACKET_SIZE = 2048, - IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, + IPOIB_PACKET_SIZE = 2048, + IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, - IPOIB_ENCAP_LEN = 4, + IPOIB_ENCAP_LEN = 4, - IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ - IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, - IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, - IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, - IPOIB_RX_RING_SIZE = 128, - IPOIB_TX_RING_SIZE = 64, + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ + IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, + IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, + IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, + IPOIB_RX_RING_SIZE = 128, + IPOIB_TX_RING_SIZE = 64, IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, + IPOIB_CM_MAX_CONN_QP = 4096, - IPOIB_NUM_WC = 4, + IPOIB_NUM_WC = 4, IPOIB_MAX_PATH_REC_QUEUE = 3, - IPOIB_MAX_MCAST_QUEUE = 3, - - IPOIB_FLAG_OPER_UP = 0, - IPOIB_FLAG_INITIALIZED = 1, - IPOIB_FLAG_ADMIN_UP = 2, - IPOIB_PKEY_ASSIGNED = 3, - IPOIB_PKEY_STOP = 4, - IPOIB_FLAG_SUBINTERFACE = 5, - IPOIB_MCAST_RUN = 6, - IPOIB_STOP_REAPER = 7, - IPOIB_MCAST_STARTED = 8, - IPOIB_FLAG_ADMIN_CM = 9, + IPOIB_MAX_MCAST_QUEUE = 3, + + IPOIB_FLAG_OPER_UP = 0, + IPOIB_FLAG_INITIALIZED = 1, + IPOIB_FLAG_ADMIN_UP = 2, + IPOIB_PKEY_ASSIGNED = 3, + IPOIB_PKEY_STOP = 4, + IPOIB_FLAG_SUBINTERFACE = 5, + IPOIB_MCAST_RUN = 6, + IPOIB_STOP_REAPER = 7, + IPOIB_MCAST_STARTED = 8, + IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, IPOIB_MAX_BACKOFF_SECONDS = 16, - IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ + IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ IPOIB_MCAST_FLAG_SENDONLY = 1, - IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ + IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, }; @@ -117,7 +118,7 @@ struct ipoib_pseudoheader { struct ipoib_mcast { struct ib_sa_mcmember_rec mcmember; struct ib_sa_multicast *mc; - struct ipoib_ah *ah; + struct ipoib_ah *ah; struct rb_node rb_node; struct list_head list; @@ -186,27 +187,29 @@ enum ipoib_cm_state { }; struct ipoib_cm_rx { - struct ib_cm_id *id; - struct ib_qp *qp; - struct list_head list; - struct net_device *dev; - unsigned long jiffies; - enum ipoib_cm_state state; + struct ib_cm_id *id; + struct ib_qp *qp; + struct ipoib_cm_rx_buf *rx_ring; + struct list_head list; + struct net_device *dev; + unsigned long jiffies; + enum ipoib_cm_state state; + int recv_count; }; struct ipoib_cm_tx { - struct ib_cm_id *id; - struct ib_qp *qp; + struct ib_cm_id *id; + struct ib_qp *qp; struct list_head list; struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - unsigned long flags; - u32 mtu; - struct ib_wc ibwc[IPOIB_NUM_WC]; + unsigned tx_head; + unsigned tx_tail; + unsigned long flags; + u32 mtu; + struct ib_wc ibwc[IPOIB_NUM_WC]; }; struct ipoib_cm_rx_buf { @@ -215,25 +218,28 @@ struct ipoib_cm_rx_buf { }; struct ipoib_cm_dev_priv { - struct ib_srq *srq; + struct ib_srq *srq; struct ipoib_cm_rx_buf *srq_ring; - struct ib_cm_id *id; - struct list_head passive_ids; /* state: LIVE */ - struct list_head rx_error_list; /* state: ERROR */ - struct list_head rx_flush_list; /* state: FLUSH, drain not started */ - struct list_head rx_drain_list; /* state: FLUSH, drain started */ - struct list_head rx_reap_list; /* state: FLUSH, drain done */ + struct ib_cm_id *id; + struct list_head passive_ids; /* state: LIVE */ + struct list_head rx_error_list; /* state: ERROR */ + struct list_head rx_flush_list; /* state: FLUSH, drain not started */ + struct list_head rx_drain_list; /* state: FLUSH, drain started */ + struct list_head rx_reap_list; /* state: FLUSH, drain done */ struct work_struct start_task; struct work_struct reap_task; struct work_struct skb_task; struct work_struct rx_reap_task; struct delayed_work stale_task; struct sk_buff_head skb_queue; - struct list_head start_list; - struct list_head reap_list; - struct ib_wc ibwc[IPOIB_NUM_WC]; - struct ib_sge rx_sge[IPOIB_CM_RX_SG]; + struct list_head start_list; + struct list_head reap_list; + struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_sge rx_sge[IPOIB_CM_RX_SG]; struct ib_recv_wr rx_wr; + int nonsrq_conn_qp; + int max_cm_mtu; + int num_frags; }; /* @@ -269,30 +275,30 @@ struct ipoib_dev_priv { struct work_struct pkey_event_task; struct ib_device *ca; - u8 port; - u16 pkey; - u16 pkey_index; - struct ib_pd *pd; - struct ib_mr *mr; - struct ib_cq *cq; - struct ib_qp *qp; - u32 qkey; + u8 port; + u16 pkey; + u16 pkey_index; + struct ib_pd *pd; + struct ib_mr *mr; + struct ib_cq *cq; + struct ib_qp *qp; + u32 qkey; union ib_gid local_gid; - u16 local_lid; + u16 local_lid; unsigned int admin_mtu; unsigned int mcast_mtu; struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; + spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - struct ib_sge tx_sge; + unsigned tx_head; + unsigned tx_tail; + struct ib_sge tx_sge; struct ib_send_wr tx_wr; - unsigned tx_outstanding; + unsigned tx_outstanding; struct ib_wc ibwc[IPOIB_NUM_WC]; @@ -317,10 +323,10 @@ struct ipoib_dev_priv { struct ipoib_ah { struct net_device *dev; - struct ib_ah *ah; + struct ib_ah *ah; struct list_head list; - struct kref ref; - unsigned last_send; + struct kref ref; + unsigned last_send; }; struct ipoib_path { @@ -331,11 +337,11 @@ struct ipoib_path { struct list_head neigh_list; - int query_id; + int query_id; struct ib_sa_query *query; struct completion done; - struct rb_node rb_node; + struct rb_node rb_node; struct list_head list; }; @@ -344,7 +350,7 @@ struct ipoib_neigh { #ifdef CONFIG_INFINIBAND_IPOIB_CM struct ipoib_cm_tx *cm; #endif - union ib_gid dgid; + union ib_gid dgid; struct sk_buff_head queue; struct neighbour *neighbour; @@ -455,12 +461,14 @@ void ipoib_drain_cq(struct net_device *dev); #ifdef CONFIG_INFINIBAND_IPOIB_CM -#define IPOIB_FLAGS_RC 0x80 -#define IPOIB_FLAGS_UC 0x40 +#define IPOIB_FLAGS_RC 0x80 +#define IPOIB_FLAGS_UC 0x40 /* We don't support UC connections at the moment */ #define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) +extern int ipoib_max_conn_qp; + static inline int ipoib_cm_admin_enabled(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -491,6 +499,18 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t neigh->cm = tx; } +static inline int ipoib_cm_has_srq(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return !!priv->cm.srq; +} + +static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + return priv->cm.max_cm_mtu; +} + void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); int ipoib_cm_dev_open(struct net_device *dev); void ipoib_cm_dev_stop(struct net_device *dev); @@ -500,7 +520,7 @@ void ipoib_cm_dev_cleanup(struct net_device *dev); struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, struct ipoib_neigh *neigh); void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); -void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, +void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, unsigned int mtu); void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc); @@ -508,6 +528,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc); struct ipoib_cm_tx; +#define ipoib_max_conn_qp 0 + static inline int ipoib_cm_admin_enabled(struct net_device *dev) { return 0; @@ -533,6 +555,16 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t { } +static inline int ipoib_cm_has_srq(struct net_device *dev) +{ + return 0; +} + +static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev) +{ + return 0; +} + static inline void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { @@ -582,7 +614,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev) return 0; } -static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, +static inline void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, unsigned int mtu) { dev_kfree_skb_any(skb); @@ -624,12 +656,12 @@ extern struct ib_sa_client ipoib_sa_client; extern int ipoib_debug_level; #define ipoib_dbg(priv, format, arg...) \ - do { \ + do { \ if (ipoib_debug_level > 0) \ ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ } while (0) #define ipoib_dbg_mcast(priv, format, arg...) \ - do { \ + do { \ if (mcast_debug_level > 0) \ ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ } while (0) @@ -642,7 +674,7 @@ extern int ipoib_debug_level; #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #define ipoib_dbg_data(priv, format, arg...) \ - do { \ + do { \ if (data_debug_level > 0) \ ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ } while (0) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 059cf92b60a5..1818f958c250 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -39,6 +39,15 @@ #include <linux/icmpv6.h> #include <linux/delay.h> +#include "ipoib.h" + +int ipoib_max_conn_qp = 128; + +module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444); +MODULE_PARM_DESC(max_nonsrq_conn_qp, + "Max number of connected-mode QPs per interface " + "(applied only if shared receive queue is not available)"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA static int data_debug_level; @@ -47,8 +56,6 @@ MODULE_PARM_DESC(cm_data_debug_level, "Enable data path debug tracing for connected mode if > 0"); #endif -#include "ipoib.h" - #define IPOIB_CM_IETF_ID 0x1000000000000000ULL #define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) @@ -81,7 +88,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); } -static int ipoib_cm_post_receive(struct net_device *dev, int id) +static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; @@ -89,13 +96,13 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id) priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, priv->cm.srq_ring[id].mapping); dev_kfree_skb_any(priv->cm.srq_ring[id].skb); priv->cm.srq_ring[id].skb = NULL; @@ -104,7 +111,33 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id) return ret; } -static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags, +static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, + struct ipoib_cm_rx *rx, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int i, ret; + + priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + + for (i = 0; i < IPOIB_CM_RX_SG; ++i) + priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; + + ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); + ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + rx->rx_ring[id].mapping); + dev_kfree_skb_any(rx->rx_ring[id].skb); + rx->rx_ring[id].skb = NULL; + } + + return ret; +} + +static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, + struct ipoib_cm_rx_buf *rx_ring, + int id, int frags, u64 mapping[IPOIB_CM_RX_SG]) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -141,7 +174,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int goto partial_error; } - priv->cm.srq_ring[id].skb = skb; + rx_ring[id].skb = skb; return skb; partial_error: @@ -155,7 +188,23 @@ partial_error: return NULL; } -static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv) +static void ipoib_cm_free_rx_ring(struct net_device *dev, + struct ipoib_cm_rx_buf *rx_ring) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < ipoib_recvq_size; ++i) + if (rx_ring[i].skb) { + ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + rx_ring[i].mapping); + dev_kfree_skb_any(rx_ring[i].skb); + } + + kfree(rx_ring); +} + +static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) { struct ib_send_wr *bad_wr; struct ipoib_cm_rx *p; @@ -208,12 +257,18 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, .qp_type = IB_QPT_RC, .qp_context = p, }; + + if (!ipoib_cm_has_srq(dev)) { + attr.cap.max_recv_wr = ipoib_recvq_size; + attr.cap.max_recv_sge = IPOIB_CM_RX_SG; + } + return ib_create_qp(priv->pd, &attr); } static int ipoib_cm_modify_rx_qp(struct net_device *dev, - struct ib_cm_id *cm_id, struct ib_qp *qp, - unsigned psn) + struct ib_cm_id *cm_id, struct ib_qp *qp, + unsigned psn) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; @@ -266,6 +321,60 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, return 0; } +static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, + struct ipoib_cm_rx *rx) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + int i; + + rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL); + if (!rx->rx_ring) + return -ENOMEM; + + spin_lock_irq(&priv->lock); + + if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { + spin_unlock_irq(&priv->lock); + ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); + ret = -EINVAL; + goto err_free; + } else + ++priv->cm.nonsrq_conn_qp; + + spin_unlock_irq(&priv->lock); + + for (i = 0; i < ipoib_recvq_size; ++i) { + if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, + rx->rx_ring[i].mapping)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + ret = -ENOMEM; + goto err_count; + } + ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); + if (ret) { + ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " + "failed for buf %d\n", i); + ret = -EIO; + goto err_count; + } + } + + rx->recv_count = ipoib_recvq_size; + + return 0; + +err_count: + spin_lock_irq(&priv->lock); + --priv->cm.nonsrq_conn_qp; + spin_unlock_irq(&priv->lock); + +err_free: + ipoib_cm_free_rx_ring(dev, rx->rx_ring); + + return ret; +} + static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, struct ib_cm_req_event_param *req, unsigned psn) @@ -281,7 +390,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, rep.private_data_len = sizeof data; rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; - rep.srq = 1; + rep.srq = ipoib_cm_has_srq(dev); rep.qp_num = qp->qp_num; rep.starting_psn = psn; return ib_send_cm_rep(cm_id, &rep); @@ -317,6 +426,12 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even if (ret) goto err_modify; + if (!ipoib_cm_has_srq(dev)) { + ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p); + if (ret) + goto err_modify; + } + spin_lock_irq(&priv->lock); queue_delayed_work(ipoib_workqueue, &priv->cm.stale_task, IPOIB_CM_RX_DELAY); @@ -401,12 +516,14 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx_buf *rx_ring; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); struct sk_buff *skb, *newskb; struct ipoib_cm_rx *p; unsigned long flags; u64 mapping[IPOIB_CM_RX_SG]; int frags; + int has_srq; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -424,18 +541,32 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) return; } - skb = priv->cm.srq_ring[wr_id].skb; + p = wc->qp->qp_context; + + has_srq = ipoib_cm_has_srq(dev); + rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring; + + skb = rx_ring[wr_id].skb; if (unlikely(wc->status != IB_WC_SUCCESS)) { ipoib_dbg(priv, "cm recv error " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); ++dev->stats.rx_dropped; - goto repost; + if (has_srq) + goto repost; + else { + if (!--p->recv_count) { + spin_lock_irqsave(&priv->lock, flags); + list_move(&p->list, &priv->cm.rx_reap_list); + spin_unlock_irqrestore(&priv->lock, flags); + queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); + } + return; + } } if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { - p = wc->qp->qp_context; if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { spin_lock_irqsave(&priv->lock, flags); p->jiffies = jiffies; @@ -450,7 +581,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; - newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping); + newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); if (unlikely(!newskb)) { /* * If we can't allocate a new RX buffer, dump @@ -461,8 +592,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) goto repost; } - ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping); - memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); + ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); + memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); @@ -483,9 +614,17 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) netif_receive_skb(skb); repost: - if (unlikely(ipoib_cm_post_receive(dev, wr_id))) - ipoib_warn(priv, "ipoib_cm_post_receive failed " - "for buf %d\n", wr_id); + if (has_srq) { + if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id))) + ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " + "for buf %d\n", wr_id); + } else { + if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { + --p->recv_count; + ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " + "for buf %d\n", wr_id); + } + } } static inline int post_send(struct ipoib_dev_priv *priv, @@ -495,10 +634,10 @@ static inline int post_send(struct ipoib_dev_priv *priv, { struct ib_send_wr *bad_wr; - priv->tx_sge.addr = addr; - priv->tx_sge.length = len; + priv->tx_sge.addr = addr; + priv->tx_sge.length = len; - priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; + priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); } @@ -540,7 +679,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ tx_req->mapping = addr; if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len))) { + addr, skb->len))) { ipoib_warn(priv, "post_send failed\n"); ++dev->stats.tx_errors; ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); @@ -657,10 +796,33 @@ err_cm: return ret; } +static void ipoib_cm_free_rx_reap_list(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_cm_rx *rx, *n; + LIST_HEAD(list); + + spin_lock_irq(&priv->lock); + list_splice_init(&priv->cm.rx_reap_list, &list); + spin_unlock_irq(&priv->lock); + + list_for_each_entry_safe(rx, n, &list, list) { + ib_destroy_cm_id(rx->id); + ib_destroy_qp(rx->qp); + if (!ipoib_cm_has_srq(dev)) { + ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring); + spin_lock_irq(&priv->lock); + --priv->cm.nonsrq_conn_qp; + spin_unlock_irq(&priv->lock); + } + kfree(rx); + } +} + void ipoib_cm_dev_stop(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_cm_rx *p, *n; + struct ipoib_cm_rx *p; unsigned long begin; LIST_HEAD(list); int ret; @@ -706,15 +868,9 @@ void ipoib_cm_dev_stop(struct net_device *dev) spin_lock_irq(&priv->lock); } - list_splice_init(&priv->cm.rx_reap_list, &list); - spin_unlock_irq(&priv->lock); - list_for_each_entry_safe(p, n, &list, list) { - ib_destroy_cm_id(p->id); - ib_destroy_qp(p->qp); - kfree(p); - } + ipoib_cm_free_rx_reap_list(dev); cancel_delayed_work(&priv->cm.stale_task); } @@ -799,7 +955,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = tx - }; + }; return ib_create_qp(priv->pd, &attr); } @@ -816,28 +972,28 @@ static int ipoib_cm_send_req(struct net_device *dev, data.qpn = cpu_to_be32(priv->qp->qp_num); data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); - req.primary_path = pathrec; - req.alternate_path = NULL; - req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); - req.qp_num = qp->qp_num; - req.qp_type = qp->qp_type; - req.private_data = &data; - req.private_data_len = sizeof data; - req.flow_control = 0; + req.primary_path = pathrec; + req.alternate_path = NULL; + req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); + req.qp_num = qp->qp_num; + req.qp_type = qp->qp_type; + req.private_data = &data; + req.private_data_len = sizeof data; + req.flow_control = 0; - req.starting_psn = 0; /* FIXME */ + req.starting_psn = 0; /* FIXME */ /* * Pick some arbitrary defaults here; we could make these * module parameters if anyone cared about setting them. */ - req.responder_resources = 4; - req.remote_cm_response_timeout = 20; - req.local_cm_response_timeout = 20; - req.retry_count = 0; /* RFC draft warns against retries */ - req.rnr_retry_count = 0; /* RFC draft warns against retries */ - req.max_cm_retries = 15; - req.srq = 1; + req.responder_resources = 4; + req.remote_cm_response_timeout = 20; + req.local_cm_response_timeout = 20; + req.retry_count = 0; /* RFC draft warns against retries */ + req.rnr_retry_count = 0; /* RFC draft warns against retries */ + req.max_cm_retries = 15; + req.srq = ipoib_cm_has_srq(dev); return ib_send_cm_req(id, &req); } @@ -1150,7 +1306,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) spin_unlock_irq(&priv->tx_lock); } -void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, +void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, unsigned int mtu) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1166,20 +1322,8 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, static void ipoib_cm_rx_reap(struct work_struct *work) { - struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, - cm.rx_reap_task); - struct ipoib_cm_rx *p, *n; - LIST_HEAD(list); - - spin_lock_irq(&priv->lock); - list_splice_init(&priv->cm.rx_reap_list, &list); - spin_unlock_irq(&priv->lock); - - list_for_each_entry_safe(p, n, &list, list) { - ib_destroy_cm_id(p->id); - ib_destroy_qp(p->qp); - kfree(p); - } + ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv, + cm.rx_reap_task)->dev); } static void ipoib_cm_stale_task(struct work_struct *work) @@ -1212,7 +1356,7 @@ static void ipoib_cm_stale_task(struct work_struct *work) } -static ssize_t show_mode(struct device *d, struct device_attribute *attr, +static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf) { struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d)); @@ -1255,16 +1399,40 @@ int ipoib_cm_add_mode_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_mode); } -int ipoib_cm_dev_init(struct net_device *dev) +static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { .attr = { .max_wr = ipoib_recvq_size, - .max_sge = IPOIB_CM_RX_SG + .max_sge = max_sge } }; - int ret, i; + + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); + if (IS_ERR(priv->cm.srq)) { + if (PTR_ERR(priv->cm.srq) != -ENOSYS) + printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n", + priv->ca->name, PTR_ERR(priv->cm.srq)); + priv->cm.srq = NULL; + return; + } + + priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, + GFP_KERNEL); + if (!priv->cm.srq_ring) { + printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); + ib_destroy_srq(priv->cm.srq); + priv->cm.srq = NULL; + } +} + +int ipoib_cm_dev_init(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i, ret; + struct ib_device_attr attr; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); @@ -1281,43 +1449,53 @@ int ipoib_cm_dev_init(struct net_device *dev) skb_queue_head_init(&priv->cm.skb_queue); - priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); - if (IS_ERR(priv->cm.srq)) { - ret = PTR_ERR(priv->cm.srq); - priv->cm.srq = NULL; + ret = ib_query_device(priv->ca, &attr); + if (ret) { + printk(KERN_WARNING "ib_query_device() failed with %d\n", ret); return ret; } - priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, - GFP_KERNEL); - if (!priv->cm.srq_ring) { - printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", - priv->ca->name, ipoib_recvq_size); - ipoib_cm_dev_cleanup(dev); - return -ENOMEM; + ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); + + attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge); + ipoib_cm_create_srq(dev, attr.max_srq_sge); + if (ipoib_cm_has_srq(dev)) { + priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10; + priv->cm.num_frags = attr.max_srq_sge; + ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", + priv->cm.max_cm_mtu, priv->cm.num_frags); + } else { + priv->cm.max_cm_mtu = IPOIB_CM_MTU; + priv->cm.num_frags = IPOIB_CM_RX_SG; } - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].lkey = priv->mr->lkey; priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < IPOIB_CM_RX_SG; ++i) + for (i = 1; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].length = PAGE_SIZE; priv->cm.rx_wr.next = NULL; priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; + priv->cm.rx_wr.num_sge = priv->cm.num_frags; + + if (ipoib_cm_has_srq(dev)) { + for (i = 0; i < ipoib_recvq_size; ++i) { + if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, + priv->cm.num_frags - 1, + priv->cm.srq_ring[i].mapping)) { + ipoib_warn(priv, "failed to allocate " + "receive buffer %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -ENOMEM; + } - for (i = 0; i < ipoib_recvq_size; ++i) { - if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1, - priv->cm.srq_ring[i].mapping)) { - ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); - ipoib_cm_dev_cleanup(dev); - return -ENOMEM; - } - if (ipoib_cm_post_receive(dev, i)) { - ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); - ipoib_cm_dev_cleanup(dev); - return -EIO; + if (ipoib_cm_post_receive_srq(dev, i)) { + ipoib_warn(priv, "ipoib_cm_post_receive_srq " + "failed for buf %d\n", i); + ipoib_cm_dev_cleanup(dev); + return -EIO; + } } } @@ -1328,7 +1506,7 @@ int ipoib_cm_dev_init(struct net_device *dev) void ipoib_cm_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i, ret; + int ret; if (!priv->cm.srq) return; @@ -1342,13 +1520,7 @@ void ipoib_cm_dev_cleanup(struct net_device *dev) priv->cm.srq = NULL; if (!priv->cm.srq_ring) return; - for (i = 0; i < ipoib_recvq_size; ++i) - if (priv->cm.srq_ring[i].skb) { - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, - priv->cm.srq_ring[i].mapping); - dev_kfree_skb_any(priv->cm.srq_ring[i].skb); - priv->cm.srq_ring[i].skb = NULL; - } - kfree(priv->cm.srq_ring); + + ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring); priv->cm.srq_ring = NULL; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 44c174182a82..8b882bbd1d05 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -124,7 +124,7 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) return 0; } -static struct seq_operations ipoib_mcg_seq_ops = { +static const struct seq_operations ipoib_mcg_seq_ops = { .start = ipoib_mcg_seq_start, .next = ipoib_mcg_seq_next, .stop = ipoib_mcg_seq_stop, @@ -230,7 +230,7 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) return 0; } -static struct seq_operations ipoib_path_seq_ops = { +static const struct seq_operations ipoib_path_seq_ops = { .start = ipoib_path_seq_start, .next = ipoib_path_seq_next, .stop = ipoib_path_seq_stop, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5063dd509ad2..52bc2bd5799a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -345,12 +345,12 @@ static inline int post_send(struct ipoib_dev_priv *priv, { struct ib_send_wr *bad_wr; - priv->tx_sge.addr = addr; - priv->tx_sge.length = len; + priv->tx_sge.addr = addr; + priv->tx_sge.length = len; - priv->tx_wr.wr_id = wr_id; + priv->tx_wr.wr_id = wr_id; priv->tx_wr.wr.ud.remote_qpn = qpn; - priv->tx_wr.wr.ud.ah = address; + priv->tx_wr.wr.ud.ah = address; return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a03a65ebcf0c..09f5371137a1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -182,17 +182,20 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) struct ipoib_dev_priv *priv = netdev_priv(dev); /* dev->mtu > 2K ==> connected mode */ - if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { + if (ipoib_cm_admin_enabled(dev)) { + if (new_mtu > ipoib_cm_max_mtu(dev)) + return -EINVAL; + if (new_mtu > priv->mcast_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); + dev->mtu = new_mtu; return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { + if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) return -EINVAL; - } priv->admin_mtu = new_mtu; @@ -460,6 +463,9 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + if (!priv->broadcast) + return NULL; + path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; @@ -471,8 +477,8 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) INIT_LIST_HEAD(&path->neigh_list); memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid)); - path->pathrec.sgid = priv->local_gid; - path->pathrec.pkey = cpu_to_be16(priv->pkey); + path->pathrec.sgid = priv->local_gid; + path->pathrec.pkey = cpu_to_be16(priv->pkey); path->pathrec.numb_path = 1; path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; @@ -666,16 +672,6 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) return NETDEV_TX_LOCKED; - /* - * Check if our queue is stopped. Since we have the LLTX bit - * set, we can't rely on netif_stop_queue() preventing our - * xmit function from being called with a full queue. - */ - if (unlikely(netif_queue_stopped(dev))) { - spin_unlock_irqrestore(&priv->tx_lock, flags); - return NETDEV_TX_BUSY; - } - if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); @@ -684,12 +680,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) neigh = *to_ipoib_neigh(skb->dst->neighbour); - if (ipoib_cm_get(neigh)) { - if (ipoib_cm_up(neigh)) { - ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - goto out; - } - } else if (neigh->ah) { + if (neigh->ah) if (unlikely((memcmp(&neigh->dgid.raw, skb->dst->neighbour->ha + 4, sizeof(union ib_gid))) || @@ -710,6 +701,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); + goto out; + } + } else if (neigh->ah) { ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); goto out; } @@ -817,11 +814,9 @@ static void ipoib_neigh_cleanup(struct neighbour *n) struct ipoib_ah *ah = NULL; neigh = *to_ipoib_neigh(n); - if (neigh) { + if (neigh) priv = netdev_priv(neigh->dev); - ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n", - n->dev->name); - } else + else return; ipoib_dbg(priv, "neigh_cleanup for %06x " IPOIB_GID_FMT "\n", @@ -947,34 +942,34 @@ static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - dev->open = ipoib_open; - dev->stop = ipoib_stop; - dev->change_mtu = ipoib_change_mtu; - dev->hard_start_xmit = ipoib_start_xmit; - dev->tx_timeout = ipoib_timeout; - dev->header_ops = &ipoib_header_ops; - dev->set_multicast_list = ipoib_set_mcast_list; - dev->neigh_setup = ipoib_neigh_setup_dev; + dev->open = ipoib_open; + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; + dev->tx_timeout = ipoib_timeout; + dev->header_ops = &ipoib_header_ops; + dev->set_multicast_list = ipoib_set_mcast_list; + dev->neigh_setup = ipoib_neigh_setup_dev; netif_napi_add(dev, &priv->napi, ipoib_poll, 100); - dev->watchdog_timeo = HZ; + dev->watchdog_timeo = HZ; - dev->flags |= IFF_BROADCAST | IFF_MULTICAST; + dev->flags |= IFF_BROADCAST | IFF_MULTICAST; /* * We add in INFINIBAND_ALEN to allow for the destination * address "pseudoheader" for skbs without neighbour struct. */ - dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; - dev->addr_len = INFINIBAND_ALEN; - dev->type = ARPHRD_INFINIBAND; - dev->tx_queue_len = ipoib_sendq_size * 2; - dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; + dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; + dev->addr_len = INFINIBAND_ALEN; + dev->type = ARPHRD_INFINIBAND; + dev->tx_queue_len = ipoib_sendq_size * 2; + dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; + dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; + priv->mcast_mtu = priv->admin_mtu = dev->mtu; memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); @@ -1265,6 +1260,9 @@ static int __init ipoib_init_module(void) ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); +#ifdef CONFIG_INFINIBAND_IPOIB_CM + ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); +#endif ret = ipoib_register_debugfs(); if (ret) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9bcfc7ad6aa6..2628339e3a99 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -702,7 +702,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - if (skb->dst && + if (skb->dst && skb->dst->neighbour && !*to_ipoib_neigh(skb->dst->neighbour)) { struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, @@ -710,7 +710,7 @@ out: if (neigh) { kref_get(&mcast->ah->ref); - neigh->ah = mcast->ah; + neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); } } @@ -788,10 +788,6 @@ void ipoib_mcast_restart_task(struct work_struct *work) memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); - /* Add in the P_Key */ - mgid.raw[4] = (priv->pkey >> 8) & 0xff; - mgid.raw[5] = priv->pkey & 0xff; - mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { struct ipoib_mcast *nmcast; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 3c6e45db0ab5..433e99ac227b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -172,8 +172,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) size = ipoib_sendq_size + ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); - if (!ret) - size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; + if (!ret) { + if (ipoib_cm_has_srq(dev)) + size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ + else + size += ipoib_recvq_size * ipoib_max_conn_qp; + } priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); if (IS_ERR(priv->cq)) { @@ -197,12 +201,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; - priv->tx_sge.lkey = priv->mr->lkey; + priv->tx_sge.lkey = priv->mr->lkey; - priv->tx_wr.opcode = IB_WR_SEND; - priv->tx_wr.sg_list = &priv->tx_sge; - priv->tx_wr.num_sge = 1; - priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->tx_wr.opcode = IB_WR_SEND; + priv->tx_wr.sg_list = &priv->tx_sge; + priv->tx_wr.num_sge = 1; + priv->tx_wr.send_flags = IB_SEND_SIGNALED; return 0; diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index fe604c8d2996..77dedba829e6 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -8,5 +8,5 @@ config INFINIBAND_ISER that speak iSCSI over iSER over InfiniBand. The iSER protocol is defined by IETF. - See <http://www.ietf.org/internet-drafts/draft-ietf-ips-iser-05.txt> - and <http://www.infinibandta.org/members/spec/iser_annex_060418.pdf> + See <http://www.ietf.org/rfc/rfc5046.txt> + and <http://www.infinibandta.org/members/spec/Annex_iSER.PDF> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index bad8dacafd10..be1b9fbd416d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -129,7 +129,7 @@ error: * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands * **/ -static void +static int iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) { struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; @@ -138,6 +138,7 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) iser_ctask->command_sent = 0; iser_ctask->iser_conn = iser_conn; iser_ctask_rdma_init(iser_ctask); + return 0; } /** @@ -220,12 +221,6 @@ iscsi_iser_ctask_xmit(struct iscsi_conn *conn, debug_scsi("ctask deq [cid %d itt 0x%x]\n", conn->id, ctask->itt); - /* - * serialize with TMF AbortTask - */ - if (ctask->mtask) - return error; - /* Send the cmd PDU */ if (!iser_ctask->command_sent) { error = iser_send_command(conn, ctask); @@ -406,6 +401,7 @@ iscsi_iser_session_create(struct iscsi_transport *iscsit, ctask = session->cmds[i]; iser_ctask = ctask->dd_data; ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; + ctask->hdr_max = sizeof(iser_ctask->desc.iscsi_header); } for (i = 0; i < session->mgmtpool_max; i++) { @@ -551,11 +547,13 @@ static struct scsi_host_template iscsi_iser_sht = { .module = THIS_MODULE, .name = "iSCSI Initiator over iSER, v." DRV_VER, .queuecommand = iscsi_queuecommand, + .change_queue_depth = iscsi_change_queue_depth, .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, .eh_abort_handler = iscsi_eh_abort, + .eh_device_reset_handler= iscsi_eh_device_reset, .eh_host_reset_handler = iscsi_eh_host_reset, .use_clustering = DISABLE_CLUSTERING, .proc_name = "iscsi_iser", @@ -582,7 +580,9 @@ static struct iscsi_transport iscsi_iser_transport = { ISCSI_PERSISTENT_ADDRESS | ISCSI_TARGET_NAME | ISCSI_TPGT | ISCSI_USERNAME | ISCSI_PASSWORD | - ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN, + ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | + ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | + ISCSI_PING_TMO | ISCSI_RECV_TMO, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME | ISCSI_HOST_INITIATOR_NAME, diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index a6f2303ed14a..83247f1fdf72 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -561,7 +561,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, if (opcode == ISCSI_OP_SCSI_CMD_RSP) { itt = get_itt(hdr->itt); /* mask out cid and age bits */ if (!(itt < session->cmds_max)) - iser_err("itt can't be matched to task!!!" + iser_err("itt can't be matched to task!!! " "conn %p opcode %d cmds_max %d itt %d\n", conn->iscsi_conn,opcode,session->cmds_max,itt); /* use the mapping given with the cmds array indexed by itt */ @@ -621,9 +621,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iscsi_session *session = conn->session; spin_lock(&conn->session->lock); - list_del(&mtask->running); - __kfifo_put(session->mgmtpool.queue, (void*)&mtask, - sizeof(void*)); + iscsi_free_mgmt_task(conn, mtask); spin_unlock(&session->lock); } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d68798061795..4a17743a639f 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -310,13 +310,15 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, if (i + 1 < data->dma_nents) { next_addr = ib_sg_dma_address(ibdev, sg_next(sg)); /* are i, i+1 fragments of the same page? */ - if (end_addr == next_addr) + if (end_addr == next_addr) { + cnt++; continue; - else if (!IS_4K_ALIGNED(end_addr)) { + } else if (!IS_4K_ALIGNED(end_addr)) { ret_len = cnt + 1; break; } } + cnt++; } if (i == data->dma_nents) ret_len = cnt; /* loop ended */ diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 654a4dce0236..714b8db02b29 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -105,7 +105,7 @@ pd_err: } /** - * iser_free_device_ib_res - destory/dealloc/dereg the DMA MR, + * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, * CQ and PD created with the device associated with the adapator. */ static void iser_free_device_ib_res(struct iser_device *device) @@ -475,13 +475,11 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve iser_disconnected_handler(cma_id); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: + iser_err("Device removal is currently unsupported\n"); BUG(); break; - case RDMA_CM_EVENT_CONNECT_RESPONSE: - BUG(); - break; - case RDMA_CM_EVENT_CONNECT_REQUEST: default: + iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } return ret; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 950228fb009f..fd4a49fc4773 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -204,6 +204,22 @@ out: return ret; } +static int srp_new_cm_id(struct srp_target_port *target) +{ + struct ib_cm_id *new_cm_id; + + new_cm_id = ib_create_cm_id(target->srp_host->dev->dev, + srp_cm_handler, target); + if (IS_ERR(new_cm_id)) + return PTR_ERR(new_cm_id); + + if (target->cm_id) + ib_destroy_cm_id(target->cm_id); + target->cm_id = new_cm_id; + + return 0; +} + static int srp_create_target_ib(struct srp_target_port *target) { struct ib_qp_init_attr *init_attr; @@ -272,7 +288,8 @@ static void srp_path_rec_completion(int status, target->status = status; if (status) - printk(KERN_ERR PFX "Got failed path rec status %d\n", status); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Got failed path rec status %d\n", status); else target->path = *pathrec; complete(&target->done); @@ -303,7 +320,8 @@ static int srp_lookup_path(struct srp_target_port *target) wait_for_completion(&target->done); if (target->status < 0) - printk(KERN_WARNING PFX "Path record query failed\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Path record query failed\n"); return target->status; } @@ -379,9 +397,10 @@ static int srp_send_req(struct srp_target_port *target) * the second 8 bytes to the local node GUID. */ if (srp_target_is_topspin(target)) { - printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " - "activated for target GUID %016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Topspin/Cisco initiator port ID workaround " + "activated for target GUID %016llx\n", + (unsigned long long) be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); memcpy(req->priv.initiator_port_id + 8, &target->srp_host->dev->dev->node_guid, 8); @@ -400,7 +419,8 @@ static void srp_disconnect_target(struct srp_target_port *target) init_completion(&target->done); if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { - printk(KERN_DEBUG PFX "Sending CM DREQ failed\n"); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); return; } wait_for_completion(&target->done); @@ -432,6 +452,7 @@ static void srp_remove_work(struct work_struct *work) static int srp_connect_target(struct srp_target_port *target) { + int retries = 3; int ret; ret = srp_lookup_path(target); @@ -464,6 +485,21 @@ static int srp_connect_target(struct srp_target_port *target) case SRP_DLID_REDIRECT: break; + case SRP_STALE_CONN: + /* Our current CM id was stale, and is now in timewait. + * Try to reconnect with a new one. + */ + if (!retries-- || srp_new_cm_id(target)) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "giving up on stale connection\n"); + target->status = -ECONNRESET; + return target->status; + } + + shost_printk(KERN_ERR, target->scsi_host, PFX + "retrying stale connection\n"); + break; + default: return target->status; } @@ -503,7 +539,6 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re static int srp_reconnect_target(struct srp_target_port *target) { - struct ib_cm_id *new_cm_id; struct ib_qp_attr qp_attr; struct srp_request *req, *tmp; struct ib_wc wc; @@ -522,14 +557,9 @@ static int srp_reconnect_target(struct srp_target_port *target) * Now get a new local CM ID so that we avoid confusing the * target in case things are really fouled up. */ - new_cm_id = ib_create_cm_id(target->srp_host->dev->dev, - srp_cm_handler, target); - if (IS_ERR(new_cm_id)) { - ret = PTR_ERR(new_cm_id); + ret = srp_new_cm_id(target); + if (ret) goto err; - } - ib_destroy_cm_id(target->cm_id); - target->cm_id = new_cm_id; qp_attr.qp_state = IB_QPS_RESET; ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); @@ -568,7 +598,8 @@ static int srp_reconnect_target(struct srp_target_port *target) return ret; err: - printk(KERN_ERR PFX "reconnect failed (%d), removing target port.\n", ret); + shost_printk(KERN_ERR, target->scsi_host, + PFX "reconnect failed (%d), removing target port.\n", ret); /* * We couldn't reconnect, so kill our target port off. @@ -683,8 +714,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, if (scmnd->sc_data_direction != DMA_FROM_DEVICE && scmnd->sc_data_direction != DMA_TO_DEVICE) { - printk(KERN_WARNING PFX "Unhandled data direction %d\n", - scmnd->sc_data_direction); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled data direction %d\n", + scmnd->sc_data_direction); return -EINVAL; } @@ -786,8 +818,9 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) } else { scmnd = req->scmnd; if (!scmnd) - printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n", - (unsigned long long) rsp->tag); + shost_printk(KERN_ERR, target->scsi_host, + "Null scmnd for RSP w/tag %016llx\n", + (unsigned long long) rsp->tag); scmnd->result = rsp->status; if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { @@ -831,7 +864,8 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) if (0) { int i; - printk(KERN_ERR PFX "recv completion, opcode 0x%02x\n", opcode); + shost_printk(KERN_ERR, target->scsi_host, + PFX "recv completion, opcode 0x%02x\n", opcode); for (i = 0; i < wc->byte_len; ++i) { if (i % 8 == 0) @@ -852,11 +886,13 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) case SRP_T_LOGOUT: /* XXX Handle target logout */ - printk(KERN_WARNING PFX "Got target logout request\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Got target logout request\n"); break; default: - printk(KERN_WARNING PFX "Unhandled SRP opcode 0x%02x\n", opcode); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled SRP opcode 0x%02x\n", opcode); break; } @@ -872,9 +908,10 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(cq, 1, &wc) > 0) { if (wc.status) { - printk(KERN_ERR PFX "failed %s status %d\n", - wc.wr_id & SRP_OP_RECV ? "receive" : "send", - wc.status); + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d\n", + wc.wr_id & SRP_OP_RECV ? "receive" : "send", + wc.status); target->qp_in_error = 1; break; } @@ -930,13 +967,18 @@ static int srp_post_recv(struct srp_target_port *target) * req_lim and tx_head. Lock cannot be dropped between call here and * call to __srp_post_send(). */ -static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target) +static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target, + enum srp_request_type req_type) { + s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2; + if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) return NULL; - if (unlikely(target->req_lim < 1)) + if (target->req_lim < min) { ++target->zero_req_lim; + return NULL; + } return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; } @@ -993,7 +1035,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, return 0; } - iu = __srp_get_tx_iu(target); + iu = __srp_get_tx_iu(target, SRP_REQ_NORMAL); if (!iu) goto err; @@ -1022,12 +1064,13 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, len = srp_map_data(scmnd, target, req); if (len < 0) { - printk(KERN_ERR PFX "Failed to map data\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Failed to map data\n"); goto err; } if (__srp_post_recv(target)) { - printk(KERN_ERR PFX "Recv failed\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "Recv failed\n"); goto err_unmap; } @@ -1035,7 +1078,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, DMA_TO_DEVICE); if (__srp_post_send(target, iu, len)) { - printk(KERN_ERR PFX "Send failed\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); goto err_unmap; } @@ -1090,6 +1133,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event, struct srp_target_port *target) { + struct Scsi_Host *shost = target->scsi_host; struct ib_class_port_info *cpi; int opcode; @@ -1115,19 +1159,22 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, memcpy(target->path.dgid.raw, event->param.rej_rcvd.ari, 16); - printk(KERN_DEBUG PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", - (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), - (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); + shost_printk(KERN_DEBUG, shost, + PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", + (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), + (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); target->status = SRP_PORT_REDIRECT; } else { - printk(KERN_WARNING " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); target->status = -ECONNRESET; } break; case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: - printk(KERN_WARNING " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); target->status = -ECONNRESET; break; @@ -1138,20 +1185,26 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, u32 reason = be32_to_cpu(rej->reason); if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) - printk(KERN_WARNING PFX - "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); + shost_printk(KERN_WARNING, shost, + PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); else - printk(KERN_WARNING PFX - "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + shost_printk(KERN_WARNING, shost, + PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); } else - printk(KERN_WARNING " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," - " opcode 0x%02x\n", opcode); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," + " opcode 0x%02x\n", opcode); target->status = -ECONNRESET; break; + case IB_CM_REJ_STALE_CONN: + shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); + target->status = SRP_STALE_CONN; + break; + default: - printk(KERN_WARNING " REJ reason 0x%x\n", - event->param.rej_rcvd.reason); + shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", + event->param.rej_rcvd.reason); target->status = -ECONNRESET; } } @@ -1166,7 +1219,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) switch (event->event) { case IB_CM_REQ_ERROR: - printk(KERN_DEBUG PFX "Sending CM REQ failed\n"); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM REQ failed\n"); comp = 1; target->status = -ECONNRESET; break; @@ -1184,7 +1238,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) target->scsi_host->can_queue = min(target->req_lim, target->scsi_host->can_queue); } else { - printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled RSP opcode %#x\n", opcode); target->status = -ECONNRESET; break; } @@ -1230,20 +1285,23 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) break; case IB_CM_REJ_RECEIVED: - printk(KERN_DEBUG PFX "REJ received\n"); + shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); comp = 1; srp_cm_rej_handler(cm_id, event, target); break; case IB_CM_DREQ_RECEIVED: - printk(KERN_WARNING PFX "DREQ received - connection closed\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "DREQ received - connection closed\n"); if (ib_send_cm_drep(cm_id, NULL, 0)) - printk(KERN_ERR PFX "Sending CM DREP failed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Sending CM DREP failed\n"); break; case IB_CM_TIMEWAIT_EXIT: - printk(KERN_ERR PFX "connection closed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "connection closed\n"); comp = 1; target->status = 0; @@ -1255,7 +1313,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) break; default: - printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled CM event %d\n", event->event); break; } @@ -1283,7 +1342,7 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, init_completion(&req->done); - iu = __srp_get_tx_iu(target); + iu = __srp_get_tx_iu(target, SRP_REQ_TASK_MGMT); if (!iu) goto out; @@ -1332,7 +1391,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) struct srp_request *req; int ret = SUCCESS; - printk(KERN_ERR "SRP abort called\n"); + shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); if (target->qp_in_error) return FAILED; @@ -1362,7 +1421,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_request *req, *tmp; - printk(KERN_ERR "SRP reset_device called\n"); + shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); if (target->qp_in_error) return FAILED; @@ -1389,7 +1448,7 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) struct srp_target_port *target = host_to_target(scmnd->device->host); int ret = FAILED; - printk(KERN_ERR PFX "SRP reset_host called\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); if (!srp_reconnect_target(target)) ret = SUCCESS; @@ -1814,8 +1873,9 @@ static ssize_t srp_create_target(struct class_device *class_dev, ib_get_cached_gid(host->dev->dev, host->port, 0, &target->path.sgid); - printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx pkey %04x " + "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", (unsigned long long) be64_to_cpu(target->id_ext), (unsigned long long) be64_to_cpu(target->ioc_guid), be16_to_cpu(target->path.pkey), @@ -1833,16 +1893,15 @@ static ssize_t srp_create_target(struct class_device *class_dev, if (ret) goto err; - target->cm_id = ib_create_cm_id(host->dev->dev, srp_cm_handler, target); - if (IS_ERR(target->cm_id)) { - ret = PTR_ERR(target->cm_id); + ret = srp_new_cm_id(target); + if (ret) goto err_free; - } target->qp_in_error = 0; ret = srp_connect_target(target); if (ret) { - printk(KERN_ERR PFX "Connection failed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection failed\n"); goto err_cm_id; } @@ -2053,6 +2112,7 @@ static void srp_remove_one(struct ib_device *device) list_for_each_entry_safe(target, tmp_target, &host->target_list, list) { + srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index e3573e7038c4..cb6eb816024a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -54,6 +54,7 @@ enum { SRP_PORT_REDIRECT = 1, SRP_DLID_REDIRECT = 2, + SRP_STALE_CONN = 3, SRP_MAX_LUN = 512, SRP_DEF_SG_TABLESIZE = 12, @@ -79,6 +80,11 @@ enum srp_target_state { SRP_TARGET_REMOVED }; +enum srp_request_type { + SRP_REQ_NORMAL, + SRP_REQ_TASK_MGMT, +}; + struct srp_device { struct list_head dev_list; struct ib_device *dev; |