diff options
Diffstat (limited to 'drivers/infiniband/hw/ipath')
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_driver.c | 20 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_file_ops.c | 19 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_kernel.h | 10 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_qp.c | 237 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_rc.c | 285 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ruc.c | 329 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_uc.c | 57 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ud.c | 66 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_user_sdma.h | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.c | 176 | ||||
| -rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.h | 64 | 
11 files changed, 718 insertions, 547 deletions
| diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ce7b7c34360e..daad09a45910 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1894,7 +1894,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)  	 */  	if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {  		int skip_cancel; -		u64 *statp = &dd->ipath_sdma_status; +		unsigned long *statp = &dd->ipath_sdma_status;  		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);  		skip_cancel = @@ -2616,7 +2616,7 @@ int ipath_reset_device(int unit)  				ipath_dbg("unit %u port %d is in use "  					  "(PID %u cmd %s), can't reset\n",  					  unit, i, -					  dd->ipath_pd[i]->port_pid, +					  pid_nr(dd->ipath_pd[i]->port_pid),  					  dd->ipath_pd[i]->port_comm);  				ret = -EBUSY;  				goto bail; @@ -2654,19 +2654,21 @@ bail:  static int ipath_signal_procs(struct ipath_devdata *dd, int sig)  {  	int i, sub, any = 0; -	pid_t pid; +	struct pid *pid;  	if (!dd->ipath_pd)  		return 0;  	for (i = 1; i < dd->ipath_cfgports; i++) { -		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || -		    !dd->ipath_pd[i]->port_pid) +		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)  			continue;  		pid = dd->ipath_pd[i]->port_pid; +		if (!pid) +			continue; +  		dev_info(&dd->pcidev->dev, "context %d in use "  			  "(PID %u), sending signal %d\n", -			  i, pid, sig); -		kill_proc(pid, sig, 1); +			  i, pid_nr(pid), sig); +		kill_pid(pid, sig, 1);  		any++;  		for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {  			pid = dd->ipath_pd[i]->port_subpid[sub]; @@ -2674,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)  				continue;  			dev_info(&dd->pcidev->dev, "sub-context "  				"%d:%d in use (PID %u), sending " -				"signal %d\n", i, sub, pid, sig); -			kill_proc(pid, sig, 1); +				"signal %d\n", i, sub, pid_nr(pid), sig); +			kill_pid(pid, sig, 1);  			any++;  		}  	} diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 3295177c937e..b472b15637f0 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -555,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,  			p = dd->ipath_pageshadow[porttid + tid];  			dd->ipath_pageshadow[porttid + tid] = NULL;  			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", -				   pd->port_pid, tid); +				   pid_nr(pd->port_pid), tid);  			dd->ipath_f_put_tid(dd, &tidbase[tid],  					    RCVHQ_RCV_TYPE_EXPECTED,  					    dd->ipath_tidinvalid); @@ -1609,7 +1609,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,  			   port);  		pd->port_cnt = 1;  		port_fp(fp) = pd; -		pd->port_pid = current->pid; +		pd->port_pid = get_pid(task_pid(current));  		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));  		ipath_stats.sps_ports++;  		ret = 0; @@ -1793,14 +1793,15 @@ static int find_shared_port(struct file *fp,  			}  			port_fp(fp) = pd;  			subport_fp(fp) = pd->port_cnt++; -			pd->port_subpid[subport_fp(fp)] = current->pid; +			pd->port_subpid[subport_fp(fp)] = +				get_pid(task_pid(current));  			tidcursor_fp(fp) = 0;  			pd->active_slaves |= 1 << subport_fp(fp);  			ipath_cdbg(PROC,  				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",  				   current->comm, current->pid,  				   subport_fp(fp), -				   pd->port_comm, pd->port_pid, +				   pd->port_comm, pid_nr(pd->port_pid),  				   dd->ipath_unit, pd->port_port);  			ret = 1;  			goto done; @@ -2066,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp)  		 * the slave(s) don't wait for receive data forever.  		 */  		pd->active_slaves &= ~(1 << fd->subport); -		pd->port_subpid[fd->subport] = 0; +		put_pid(pd->port_subpid[fd->subport]); +		pd->port_subpid[fd->subport] = NULL;  		mutex_unlock(&ipath_mutex);  		goto bail;  	} @@ -2074,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp)  	if (pd->port_hdrqfull) {  		ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " -			   "during run\n", pd->port_comm, pd->port_pid, +			   "during run\n", pd->port_comm, pid_nr(pd->port_pid),  			   pd->port_hdrqfull);  		pd->port_hdrqfull = 0;  	} @@ -2134,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp)  			unlock_expected_tids(pd);  		ipath_stats.sps_ports--;  		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", -			   pd->port_comm, pd->port_pid, +			   pd->port_comm, pid_nr(pd->port_pid),  			   dd->ipath_unit, port);  	} -	pd->port_pid = 0; +	put_pid(pd->port_pid); +	pd->port_pid = NULL;  	dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */  	mutex_unlock(&ipath_mutex);  	ipath_free_pddata(dd, pd); /* after releasing the mutex */ diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 02b24a340599..59a8b254b97f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -159,8 +159,8 @@ struct ipath_portdata {  	/* saved total number of polled urgent packets for poll edge trigger */  	u32 port_urgent_poll;  	/* pid of process using this port */ -	pid_t port_pid; -	pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; +	struct pid *port_pid; +	struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];  	/* same size as task_struct .comm[] */  	char port_comm[16];  	/* pkeys set by this use of this port */ @@ -483,7 +483,7 @@ struct ipath_devdata {  	/* SendDMA related entries */  	spinlock_t            ipath_sdma_lock; -	u64                   ipath_sdma_status; +	unsigned long         ipath_sdma_status;  	unsigned long         ipath_sdma_abort_jiffies;  	unsigned long         ipath_sdma_abort_intr_timeout;  	unsigned long         ipath_sdma_buf_jiffies; @@ -822,8 +822,8 @@ struct ipath_devdata {  #define IPATH_SDMA_DISARMED  1  #define IPATH_SDMA_DISABLED  2  #define IPATH_SDMA_LAYERBUF  3 -#define IPATH_SDMA_RUNNING  62 -#define IPATH_SDMA_SHUTDOWN 63 +#define IPATH_SDMA_RUNNING  30 +#define IPATH_SDMA_SHUTDOWN 31  /* bit combinations that correspond to abort states */  #define IPATH_SDMA_ABORT_NONE 0 diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index dd5b6e9d57c2..4715911101e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)  {  	struct ipath_qp *q, **qpp;  	unsigned long flags; -	int fnd = 0;  	spin_lock_irqsave(&qpt->lock, flags); @@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)  			*qpp = qp->next;  			qp->next = NULL;  			atomic_dec(&qp->refcount); -			fnd = 1;  			break;  		}  	}  	spin_unlock_irqrestore(&qpt->lock, flags); - -	if (!fnd) -		return; - -	free_qpn(qpt, qp->ibqp.qp_num); - -	wait_event(qp->wait, !atomic_read(&qp->refcount));  }  /** - * ipath_free_all_qps - remove all QPs from the table + * ipath_free_all_qps - check for QPs still in use   * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table.   */ -void ipath_free_all_qps(struct ipath_qp_table *qpt) +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)  {  	unsigned long flags; -	struct ipath_qp *qp, *nqp; -	u32 n; +	struct ipath_qp *qp; +	u32 n, qp_inuse = 0; +	spin_lock_irqsave(&qpt->lock, flags);  	for (n = 0; n < qpt->max; n++) { -		spin_lock_irqsave(&qpt->lock, flags);  		qp = qpt->table[n];  		qpt->table[n] = NULL; -		spin_unlock_irqrestore(&qpt->lock, flags); - -		while (qp) { -			nqp = qp->next; -			free_qpn(qpt, qp->ibqp.qp_num); -			if (!atomic_dec_and_test(&qp->refcount) || -			    !ipath_destroy_qp(&qp->ibqp)) -				ipath_dbg("QP memory leak!\n"); -			qp = nqp; -		} + +		for (; qp; qp = qp->next) +			qp_inuse++;  	} +	spin_unlock_irqrestore(&qpt->lock, flags); -	for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { +	for (n = 0; n < ARRAY_SIZE(qpt->map); n++)  		if (qpt->map[n].page) -			free_page((unsigned long)qpt->map[n].page); -	} +			free_page((unsigned long) qpt->map[n].page); +	return qp_inuse;  }  /** @@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)  	qp->remote_qpn = 0;  	qp->qkey = 0;  	qp->qp_access_flags = 0; -	qp->s_busy = 0; +	atomic_set(&qp->s_dma_busy, 0);  	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;  	qp->s_hdrwords = 0;  	qp->s_wqe = NULL;  	qp->s_pkt_delay = 0; +	qp->s_draining = 0;  	qp->s_psn = 0;  	qp->r_psn = 0;  	qp->r_msn = 0; @@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)  	}  	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;  	qp->r_nak_state = 0; -	qp->r_wrid_valid = 0; +	qp->r_aflags = 0; +	qp->r_flags = 0;  	qp->s_rnr_timeout = 0;  	qp->s_head = 0;  	qp->s_tail = 0; @@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)  	qp->s_last = 0;  	qp->s_ssn = 1;  	qp->s_lsn = 0; -	qp->s_wait_credit = 0;  	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));  	qp->r_head_ack_queue = 0;  	qp->s_tail_ack_queue = 0; @@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)  		qp->r_rq.wq->head = 0;  		qp->r_rq.wq->tail = 0;  	} -	qp->r_reuse_sge = 0;  }  /** - * ipath_error_qp - put a QP into an error state - * @qp: the QP to put into an error state + * ipath_error_qp - put a QP into the error state + * @qp: the QP to put into the error state   * @err: the receive completion error to signal if a RWQE is active   *   * Flushes both send and receive work queues.   * Returns true if last WQE event should be generated.   * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return.   */  int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) @@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)  	struct ib_wc wc;  	int ret = 0; -	ipath_dbg("QP%d/%d in error state (%d)\n", -		  qp->ibqp.qp_num, qp->remote_qpn, err); +	if (qp->state == IB_QPS_ERR) +		goto bail; + +	qp->state = IB_QPS_ERR;  	spin_lock(&dev->pending_lock);  	if (!list_empty(&qp->timerwait)) @@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)  		list_del_init(&qp->piowait);  	spin_unlock(&dev->pending_lock); -	wc.vendor_err = 0; -	wc.byte_len = 0; -	wc.imm_data = 0; +	/* Schedule the sending tasklet to drain the send work queue. */ +	if (qp->s_last != qp->s_head) +		ipath_schedule_send(qp); + +	memset(&wc, 0, sizeof(wc));  	wc.qp = &qp->ibqp; -	wc.src_qp = 0; -	wc.wc_flags = 0; -	wc.pkey_index = 0; -	wc.slid = 0; -	wc.sl = 0; -	wc.dlid_path_bits = 0; -	wc.port_num = 0; -	if (qp->r_wrid_valid) { -		qp->r_wrid_valid = 0; +	wc.opcode = IB_WC_RECV; + +	if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {  		wc.wr_id = qp->r_wr_id; -		wc.opcode = IB_WC_RECV;  		wc.status = err;  		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);  	}  	wc.status = IB_WC_WR_FLUSH_ERR; -	while (qp->s_last != qp->s_head) { -		struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - -		wc.wr_id = wqe->wr.wr_id; -		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -		if (++qp->s_last >= qp->s_size) -			qp->s_last = 0; -		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); -	} -	qp->s_cur = qp->s_tail = qp->s_head; -	qp->s_hdrwords = 0; -	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; -  	if (qp->r_rq.wq) {  		struct ipath_rwq *wq;  		u32 head; @@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)  		tail = wq->tail;  		if (tail >= qp->r_rq.size)  			tail = 0; -		wc.opcode = IB_WC_RECV;  		while (tail != head) {  			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;  			if (++tail >= qp->r_rq.size) @@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)  	} else if (qp->ibqp.event_handler)  		ret = 1; +bail:  	return ret;  } @@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	struct ipath_ibdev *dev = to_idev(ibqp->device);  	struct ipath_qp *qp = to_iqp(ibqp);  	enum ib_qp_state cur_state, new_state; -	unsigned long flags;  	int lastwqe = 0;  	int ret; -	spin_lock_irqsave(&qp->s_lock, flags); +	spin_lock_irq(&qp->s_lock);  	cur_state = attr_mask & IB_QP_CUR_STATE ?  		attr->cur_qp_state : qp->state; @@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	switch (new_state) {  	case IB_QPS_RESET: +		if (qp->state != IB_QPS_RESET) { +			qp->state = IB_QPS_RESET; +			spin_lock(&dev->pending_lock); +			if (!list_empty(&qp->timerwait)) +				list_del_init(&qp->timerwait); +			if (!list_empty(&qp->piowait)) +				list_del_init(&qp->piowait); +			spin_unlock(&dev->pending_lock); +			qp->s_flags &= ~IPATH_S_ANY_WAIT; +			spin_unlock_irq(&qp->s_lock); +			/* Stop the sending tasklet */ +			tasklet_kill(&qp->s_task); +			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); +			spin_lock_irq(&qp->s_lock); +		}  		ipath_reset_qp(qp, ibqp->qp_type);  		break; +	case IB_QPS_SQD: +		qp->s_draining = qp->s_last != qp->s_cur; +		qp->state = new_state; +		break; + +	case IB_QPS_SQE: +		if (qp->ibqp.qp_type == IB_QPT_RC) +			goto inval; +		qp->state = new_state; +		break; +  	case IB_QPS_ERR:  		lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);  		break;  	default: +		qp->state = new_state;  		break; -  	}  	if (attr_mask & IB_QP_PKEY_INDEX) @@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)  		qp->s_max_rd_atomic = attr->max_rd_atomic; -	qp->state = new_state; -	spin_unlock_irqrestore(&qp->s_lock, flags); +	spin_unlock_irq(&qp->s_lock);  	if (lastwqe) {  		struct ib_event ev; @@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	goto bail;  inval: -	spin_unlock_irqrestore(&qp->s_lock, flags); +	spin_unlock_irq(&qp->s_lock);  	ret = -EINVAL;  bail: @@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	attr->pkey_index = qp->s_pkey_index;  	attr->alt_pkey_index = 0;  	attr->en_sqd_async_notify = 0; -	attr->sq_draining = 0; +	attr->sq_draining = qp->s_draining;  	attr->max_rd_atomic = qp->s_max_rd_atomic;  	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;  	attr->min_rnr_timer = qp->r_min_rnr_timer; @@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,  		spin_lock_init(&qp->r_rq.lock);  		atomic_set(&qp->refcount, 0);  		init_waitqueue_head(&qp->wait); +		init_waitqueue_head(&qp->wait_dma);  		tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);  		INIT_LIST_HEAD(&qp->piowait);  		INIT_LIST_HEAD(&qp->timerwait); @@ -926,6 +924,7 @@ bail_ip:  	else  		vfree(qp->r_rq.wq);  	ipath_free_qp(&dev->qp_table, qp); +	free_qpn(&dev->qp_table, qp->ibqp.qp_num);  bail_qp:  	kfree(qp);  bail_swq: @@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)  {  	struct ipath_qp *qp = to_iqp(ibqp);  	struct ipath_ibdev *dev = to_idev(ibqp->device); -	unsigned long flags; -	spin_lock_irqsave(&qp->s_lock, flags); -	qp->state = IB_QPS_ERR; -	spin_unlock_irqrestore(&qp->s_lock, flags); -	spin_lock(&dev->n_qps_lock); -	dev->n_qps_allocated--; -	spin_unlock(&dev->n_qps_lock); +	/* Make sure HW and driver activity is stopped. */ +	spin_lock_irq(&qp->s_lock); +	if (qp->state != IB_QPS_RESET) { +		qp->state = IB_QPS_RESET; +		spin_lock(&dev->pending_lock); +		if (!list_empty(&qp->timerwait)) +			list_del_init(&qp->timerwait); +		if (!list_empty(&qp->piowait)) +			list_del_init(&qp->piowait); +		spin_unlock(&dev->pending_lock); +		qp->s_flags &= ~IPATH_S_ANY_WAIT; +		spin_unlock_irq(&qp->s_lock); +		/* Stop the sending tasklet */ +		tasklet_kill(&qp->s_task); +		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); +	} else +		spin_unlock_irq(&qp->s_lock); -	/* Stop the sending tasklet. */ -	tasklet_kill(&qp->s_task); +	ipath_free_qp(&dev->qp_table, qp);  	if (qp->s_tx) {  		atomic_dec(&qp->refcount);  		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)  			kfree(qp->s_tx->txreq.map_addr); +		spin_lock_irq(&dev->pending_lock); +		list_add(&qp->s_tx->txreq.list, &dev->txreq_free); +		spin_unlock_irq(&dev->pending_lock); +		qp->s_tx = NULL;  	} -	/* Make sure the QP isn't on the timeout list. */ -	spin_lock_irqsave(&dev->pending_lock, flags); -	if (!list_empty(&qp->timerwait)) -		list_del_init(&qp->timerwait); -	if (!list_empty(&qp->piowait)) -		list_del_init(&qp->piowait); -	if (qp->s_tx) -		list_add(&qp->s_tx->txreq.list, &dev->txreq_free); -	spin_unlock_irqrestore(&dev->pending_lock, flags); +	wait_event(qp->wait, !atomic_read(&qp->refcount)); -	/* -	 * Make sure that the QP is not in the QPN table so receive -	 * interrupts will discard packets for this QP.  XXX Also remove QP -	 * from multicast table. -	 */ -	if (atomic_read(&qp->refcount) != 0) -		ipath_free_qp(&dev->qp_table, qp); +	/* all user's cleaned up, mark it available */ +	free_qpn(&dev->qp_table, qp->ibqp.qp_num); +	spin_lock(&dev->n_qps_lock); +	dev->n_qps_allocated--; +	spin_unlock(&dev->n_qps_lock);  	if (qp->ip)  		kref_put(&qp->ip->ref, ipath_release_mmap_info); @@ -1026,48 +1028,6 @@ bail:  }  /** - * ipath_sqerror_qp - put a QP's send queue into an error state - * @qp: QP who's send queue will be put into an error state - * @wc: the WC responsible for putting the QP in this state - * - * Flushes the send work queue. - * The QP s_lock should be held and interrupts disabled. - */ - -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) -{ -	struct ipath_ibdev *dev = to_idev(qp->ibqp.device); -	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - -	ipath_dbg("Send queue error on QP%d/%d: err: %d\n", -		  qp->ibqp.qp_num, qp->remote_qpn, wc->status); - -	spin_lock(&dev->pending_lock); -	if (!list_empty(&qp->timerwait)) -		list_del_init(&qp->timerwait); -	if (!list_empty(&qp->piowait)) -		list_del_init(&qp->piowait); -	spin_unlock(&dev->pending_lock); - -	ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); -	if (++qp->s_last >= qp->s_size) -		qp->s_last = 0; - -	wc->status = IB_WC_WR_FLUSH_ERR; - -	while (qp->s_last != qp->s_head) { -		wqe = get_swqe_ptr(qp, qp->s_last); -		wc->wr_id = wqe->wr.wr_id; -		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); -		if (++qp->s_last >= qp->s_size) -			qp->s_last = 0; -	} -	qp->s_cur = qp->s_tail = qp->s_head; -	qp->state = IB_QPS_SQE; -} - -/**   * ipath_get_credit - flush the send work queue of a QP   * @qp: the qp who's send work queue to flush   * @aeth: the Acknowledge Extended Transport Header @@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)  	}  	/* Restart sending if it was blocked due to lack of credits. */ -	if (qp->s_cur != qp->s_head && +	if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && +	    qp->s_cur != qp->s_head &&  	    (qp->s_lsn == (u32) -1 ||  	     ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,  			 qp->s_lsn + 1) <= 0)) -		tasklet_hi_schedule(&qp->s_task); +		ipath_schedule_send(qp);  } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 08b11b567614..108df667d2ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,  	u32 bth0;  	u32 bth2; +	/* Don't send an ACK if we aren't supposed to. */ +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) +		goto bail; +  	/* header size in 32-bit words LRH+BTH = (8+12)/4. */  	hwords = 5; @@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)  	    ipath_make_rc_ack(dev, qp, ohdr, pmtu))  		goto done; -	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || -	    qp->s_rnr_timeout || qp->s_wait_credit) -		goto bail; +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { +		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) +			goto bail; +		/* We are in the error state, flush the work request. */ +		if (qp->s_last == qp->s_head) +			goto bail; +		/* If DMAs are in progress, we can't flush immediately. */ +		if (atomic_read(&qp->s_dma_busy)) { +			qp->s_flags |= IPATH_S_WAIT_DMA; +			goto bail; +		} +		wqe = get_swqe_ptr(qp, qp->s_last); +		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); +		goto done; +	} -	/* Limit the number of packets sent without an ACK. */ -	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { -		qp->s_wait_credit = 1; -		dev->n_rc_stalls++; +	/* Leave BUSY set until RNR timeout. */ +	if (qp->s_rnr_timeout) { +		qp->s_flags |= IPATH_S_WAITING;  		goto bail;  	} @@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)  	wqe = get_swqe_ptr(qp, qp->s_cur);  	switch (qp->s_state) {  	default: +		if (!(ib_ipath_state_ops[qp->state] & +		    IPATH_PROCESS_NEXT_SEND_OK)) +			goto bail;  		/*  		 * Resend an old request or start a new one.  		 * @@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)  		case IB_WR_SEND_WITH_IMM:  			/* If no credit, return. */  			if (qp->s_lsn != (u32) -1 && -			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) +			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { +				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;  				goto bail; +			}  			wqe->lpsn = wqe->psn;  			if (len > pmtu) {  				wqe->lpsn += (len - 1) / pmtu; @@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)  		case IB_WR_RDMA_WRITE_WITH_IMM:  			/* If no credit, return. */  			if (qp->s_lsn != (u32) -1 && -			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) +			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { +				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;  				goto bail; +			}  			ohdr->u.rc.reth.vaddr =  				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);  			ohdr->u.rc.reth.rkey = @@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)  	ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);  done:  	ret = 1; +	goto unlock; +  bail: +	qp->s_flags &= ~IPATH_S_BUSY; +unlock:  	spin_unlock_irqrestore(&qp->s_lock, flags);  	return ret;  } @@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)  	spin_unlock_irqrestore(&qp->s_lock, flags); +	/* Don't try to send ACKs if the link isn't ACTIVE */  	dd = dev->dd; +	if (!(dd->ipath_flags & IPATH_LINKACTIVE)) +		goto done; +  	piobuf = ipath_getpiobuf(dd, 0, NULL);  	if (!piobuf) {  		/* @@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)  	goto done;  queue_ack: -	dev->n_rc_qacks++; -	qp->s_flags |= IPATH_S_ACK_PENDING; -	qp->s_nak_state = qp->r_nak_state; -	qp->s_ack_psn = qp->r_ack_psn; +	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { +		dev->n_rc_qacks++; +		qp->s_flags |= IPATH_S_ACK_PENDING; +		qp->s_nak_state = qp->r_nak_state; +		qp->s_ack_psn = qp->r_ack_psn; + +		/* Schedule the send tasklet. */ +		ipath_schedule_send(qp); +	}  	spin_unlock_irqrestore(&qp->s_lock, flags); - -	/* Call ipath_do_rc_send() in another thread. */ -	tasklet_hi_schedule(&qp->s_task); -  done:  	return;  } @@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)  	/*  	 * Set the state to restart in the middle of a request.  	 * Don't change the s_sge, s_cur_sge, or s_cur_size. -	 * See ipath_do_rc_send(). +	 * See ipath_make_rc_req().  	 */  	switch (opcode) {  	case IB_WR_SEND: @@ -771,27 +802,14 @@ done:   *   * The QP s_lock should be held and interrupts disabled.   */ -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) +void ipath_restart_rc(struct ipath_qp *qp, u32 psn)  {  	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);  	struct ipath_ibdev *dev;  	if (qp->s_retry == 0) { -		wc->wr_id = wqe->wr.wr_id; -		wc->status = IB_WC_RETRY_EXC_ERR; -		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -		wc->vendor_err = 0; -		wc->byte_len = 0; -		wc->qp = &qp->ibqp; -		wc->imm_data = 0; -		wc->src_qp = qp->remote_qpn; -		wc->wc_flags = 0; -		wc->pkey_index = 0; -		wc->slid = qp->remote_ah_attr.dlid; -		wc->sl = qp->remote_ah_attr.sl; -		wc->dlid_path_bits = 0; -		wc->port_num = 0; -		ipath_sqerror_qp(qp, wc); +		ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); +		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);  		goto bail;  	}  	qp->s_retry--; @@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)  	spin_lock(&dev->pending_lock);  	if (!list_empty(&qp->timerwait))  		list_del_init(&qp->timerwait); +	if (!list_empty(&qp->piowait)) +		list_del_init(&qp->piowait);  	spin_unlock(&dev->pending_lock);  	if (wqe->wr.opcode == IB_WR_RDMA_READ) @@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)  		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;  	reset_psn(qp, psn); -	tasklet_hi_schedule(&qp->s_task); +	ipath_schedule_send(qp);  bail:  	return; @@ -820,13 +840,7 @@ bail:  static inline void update_last_psn(struct ipath_qp *qp, u32 psn)  { -	if (qp->s_last_psn != psn) { -		qp->s_last_psn = psn; -		if (qp->s_wait_credit) { -			qp->s_wait_credit = 0; -			tasklet_hi_schedule(&qp->s_task); -		} -	} +	qp->s_last_psn = psn;  }  /** @@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  {  	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);  	struct ib_wc wc; +	enum ib_wc_status status;  	struct ipath_swqe *wqe;  	int ret = 0;  	u32 ack_psn; @@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  			 */  			update_last_psn(qp, wqe->psn - 1);  			/* Retry this request. */ -			ipath_restart_rc(qp, wqe->psn, &wc); +			ipath_restart_rc(qp, wqe->psn);  			/*  			 * No need to process the ACK/NAK since we are  			 * restarting an earlier request. @@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {  			qp->s_num_rd_atomic--;  			/* Restart sending task if fence is complete */ -			if ((qp->s_flags & IPATH_S_FENCE_PENDING) && -			    !qp->s_num_rd_atomic) { -				qp->s_flags &= ~IPATH_S_FENCE_PENDING; -				tasklet_hi_schedule(&qp->s_task); -			} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { -				qp->s_flags &= ~IPATH_S_RDMAR_PENDING; -				tasklet_hi_schedule(&qp->s_task); -			} +			if (((qp->s_flags & IPATH_S_FENCE_PENDING) && +			     !qp->s_num_rd_atomic) || +			    qp->s_flags & IPATH_S_RDMAR_PENDING) +				ipath_schedule_send(qp);  		}  		/* Post a send completion queue entry if requested. */  		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||  		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) { +			memset(&wc, 0, sizeof wc);  			wc.wr_id = wqe->wr.wr_id;  			wc.status = IB_WC_SUCCESS;  			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -			wc.vendor_err = 0;  			wc.byte_len = wqe->length; -			wc.imm_data = 0;  			wc.qp = &qp->ibqp;  			wc.src_qp = qp->remote_qpn; -			wc.wc_flags = 0; -			wc.pkey_index = 0;  			wc.slid = qp->remote_ah_attr.dlid;  			wc.sl = qp->remote_ah_attr.sl; -			wc.dlid_path_bits = 0; -			wc.port_num = 0;  			ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);  		}  		qp->s_retry = qp->s_retry_cnt; @@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  		} else {  			if (++qp->s_last >= qp->s_size)  				qp->s_last = 0; +			if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) +				qp->s_draining = 0;  			if (qp->s_last == qp->s_tail)  				break;  			wqe = get_swqe_ptr(qp, qp->s_last); @@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  			 */  			if (ipath_cmp24(qp->s_psn, psn) <= 0) {  				reset_psn(qp, psn + 1); -				tasklet_hi_schedule(&qp->s_task); +				ipath_schedule_send(qp);  			}  		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {  			qp->s_state = OP(SEND_LAST); @@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  		if (qp->s_last == qp->s_tail)  			goto bail;  		if (qp->s_rnr_retry == 0) { -			wc.status = IB_WC_RNR_RETRY_EXC_ERR; +			status = IB_WC_RNR_RETRY_EXC_ERR;  			goto class_b;  		}  		if (qp->s_rnr_retry_cnt < 7) @@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  			ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &  					   IPATH_AETH_CREDIT_MASK];  		ipath_insert_rnr_queue(qp); +		ipath_schedule_send(qp);  		goto bail;  	case 3:		/* NAK */ @@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,  			 * RDMA READ response which terminates the RDMA  			 * READ.  			 */ -			ipath_restart_rc(qp, psn, &wc); +			ipath_restart_rc(qp, psn);  			break;  		case 1:	/* Invalid Request */ -			wc.status = IB_WC_REM_INV_REQ_ERR; +			status = IB_WC_REM_INV_REQ_ERR;  			dev->n_other_naks++;  			goto class_b;  		case 2:	/* Remote Access Error */ -			wc.status = IB_WC_REM_ACCESS_ERR; +			status = IB_WC_REM_ACCESS_ERR;  			dev->n_other_naks++;  			goto class_b;  		case 3:	/* Remote Operation Error */ -			wc.status = IB_WC_REM_OP_ERR; +			status = IB_WC_REM_OP_ERR;  			dev->n_other_naks++;  		class_b: -			wc.wr_id = wqe->wr.wr_id; -			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -			wc.vendor_err = 0; -			wc.byte_len = 0; -			wc.qp = &qp->ibqp; -			wc.imm_data = 0; -			wc.src_qp = qp->remote_qpn; -			wc.wc_flags = 0; -			wc.pkey_index = 0; -			wc.slid = qp->remote_ah_attr.dlid; -			wc.sl = qp->remote_ah_attr.sl; -			wc.dlid_path_bits = 0; -			wc.port_num = 0; -			ipath_sqerror_qp(qp, &wc); +			ipath_send_complete(qp, wqe, status); +			ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);  			break;  		default: @@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  				     int header_in_data)  {  	struct ipath_swqe *wqe; +	enum ib_wc_status status;  	unsigned long flags; -	struct ib_wc wc;  	int diff;  	u32 pad;  	u32 aeth; @@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  	spin_lock_irqsave(&qp->s_lock, flags); +	/* Double check we can process this now that we hold the s_lock. */ +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) +		goto ack_done; +  	/* Ignore invalid responses. */  	if (ipath_cmp24(psn, qp->s_next_psn) >= 0)  		goto ack_done; @@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  	if (unlikely(qp->s_last == qp->s_tail))  		goto ack_done;  	wqe = get_swqe_ptr(qp, qp->s_last); +	status = IB_WC_SUCCESS;  	switch (opcode) {  	case OP(ACKNOWLEDGE): @@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  		wqe = get_swqe_ptr(qp, qp->s_last);  		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))  			goto ack_op_err; +		qp->r_flags &= ~IPATH_R_RDMAR_SEQ;  		/*  		 * If this is a response to a resent RDMA read, we  		 * have to be careful to copy the data to the right @@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  		/* no AETH, no ACK */  		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {  			dev->n_rdma_seq++; -			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); +			if (qp->r_flags & IPATH_R_RDMAR_SEQ) +				goto ack_done; +			qp->r_flags |= IPATH_R_RDMAR_SEQ; +			ipath_restart_rc(qp, qp->s_last_psn + 1);  			goto ack_done;  		}  		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  		/* ACKs READ req. */  		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {  			dev->n_rdma_seq++; -			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); +			if (qp->r_flags & IPATH_R_RDMAR_SEQ) +				goto ack_done; +			qp->r_flags |= IPATH_R_RDMAR_SEQ; +			ipath_restart_rc(qp, qp->s_last_psn + 1);  			goto ack_done;  		}  		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,  		goto ack_done;  	} -ack_done: -	spin_unlock_irqrestore(&qp->s_lock, flags); -	goto bail; -  ack_op_err: -	wc.status = IB_WC_LOC_QP_OP_ERR; +	status = IB_WC_LOC_QP_OP_ERR;  	goto ack_err;  ack_len_err: -	wc.status = IB_WC_LOC_LEN_ERR; +	status = IB_WC_LOC_LEN_ERR;  ack_err: -	wc.wr_id = wqe->wr.wr_id; -	wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -	wc.vendor_err = 0; -	wc.byte_len = 0; -	wc.imm_data = 0; -	wc.qp = &qp->ibqp; -	wc.src_qp = qp->remote_qpn; -	wc.wc_flags = 0; -	wc.pkey_index = 0; -	wc.slid = qp->remote_ah_attr.dlid; -	wc.sl = qp->remote_ah_attr.sl; -	wc.dlid_path_bits = 0; -	wc.port_num = 0; -	ipath_sqerror_qp(qp, &wc); +	ipath_send_complete(qp, wqe, status); +	ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); +ack_done:  	spin_unlock_irqrestore(&qp->s_lock, flags);  bail:  	return; @@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,  	psn &= IPATH_PSN_MASK;  	e = NULL;  	old_req = 1; +  	spin_lock_irqsave(&qp->s_lock, flags); +	/* Double check we can process this now that we hold the s_lock. */ +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) +		goto unlock_done; +  	for (i = qp->r_head_ack_queue; ; i = prev) {  		if (i == qp->s_tail_ack_queue)  			old_req = 0; @@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,  		break;  	}  	qp->r_nak_state = 0; -	tasklet_hi_schedule(&qp->s_task); +	ipath_schedule_send(qp);  unlock_done:  	spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1523,13 +1522,12 @@ send_ack:  	return 0;  } -static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)  {  	unsigned long flags;  	int lastwqe;  	spin_lock_irqsave(&qp->s_lock, flags); -	qp->state = IB_QPS_ERR;  	lastwqe = ipath_error_qp(qp, err);  	spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)  static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)  { -	unsigned long flags;  	unsigned next;  	next = n + 1;  	if (next > IPATH_MAX_RDMA_ATOMIC)  		next = 0; -	spin_lock_irqsave(&qp->s_lock, flags);  	if (n == qp->s_tail_ack_queue) {  		qp->s_tail_ack_queue = next;  		qp->s_ack_state = OP(ACKNOWLEDGE);  	} -	spin_unlock_irqrestore(&qp->s_lock, flags);  }  /** @@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  	int diff;  	struct ib_reth *reth;  	int header_in_data; +	unsigned long flags;  	/* Validate the SLID. See Ch. 9.6.1.5 */  	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		    opcode == OP(SEND_LAST) ||  		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))  			break; -	nack_inv: -		ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); -		qp->r_nak_state = IB_NAK_INVALID_REQUEST; -		qp->r_ack_psn = qp->r_psn; -		goto send_ack; +		goto nack_inv;  	case OP(RDMA_WRITE_FIRST):  	case OP(RDMA_WRITE_MIDDLE): @@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		break;  	} -	wc.imm_data = 0; -	wc.wc_flags = 0; +	memset(&wc, 0, sizeof wc);  	/* OK, process the packet. */  	switch (opcode) {  	case OP(SEND_FIRST): -		if (!ipath_get_rwqe(qp, 0)) { -		rnr_nak: -			qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; -			qp->r_ack_psn = qp->r_psn; -			goto send_ack; -		} +		if (!ipath_get_rwqe(qp, 0)) +			goto rnr_nak;  		qp->r_rcv_len = 0;  		/* FALLTHROUGH */  	case OP(SEND_MIDDLE): @@ -1741,9 +1728,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  			goto nack_inv;  		ipath_copy_sge(&qp->r_sge, data, tlen);  		qp->r_msn++; -		if (!qp->r_wrid_valid) +		if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))  			break; -		qp->r_wrid_valid = 0;  		wc.wr_id = qp->r_wr_id;  		wc.status = IB_WC_SUCCESS;  		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || @@ -1751,14 +1737,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;  		else  			wc.opcode = IB_WC_RECV; -		wc.vendor_err = 0;  		wc.qp = &qp->ibqp;  		wc.src_qp = qp->remote_qpn; -		wc.pkey_index = 0;  		wc.slid = qp->remote_ah_attr.dlid;  		wc.sl = qp->remote_ah_attr.sl; -		wc.dlid_path_bits = 0; -		wc.port_num = 0;  		/* Signal completion event if the solicited bit is set. */  		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,  			       (ohdr->bth[0] & @@ -1819,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		next = qp->r_head_ack_queue + 1;  		if (next > IPATH_MAX_RDMA_ATOMIC)  			next = 0; +		spin_lock_irqsave(&qp->s_lock, flags); +		/* Double check we can process this while holding the s_lock. */ +		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) +			goto unlock;  		if (unlikely(next == qp->s_tail_ack_queue)) {  			if (!qp->s_ack_queue[next].sent) -				goto nack_inv; +				goto nack_inv_unlck;  			ipath_update_ack_queue(qp, next);  		}  		e = &qp->s_ack_queue[qp->r_head_ack_queue]; @@ -1842,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,  					   rkey, IB_ACCESS_REMOTE_READ);  			if (unlikely(!ok)) -				goto nack_acc; +				goto nack_acc_unlck;  			/*  			 * Update the next expected PSN.  We add 1 later  			 * below, so only add the remainder here. @@ -1869,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		qp->r_psn++;  		qp->r_state = opcode;  		qp->r_nak_state = 0; -		barrier();  		qp->r_head_ack_queue = next; -		/* Call ipath_do_rc_send() in another thread. */ -		tasklet_hi_schedule(&qp->s_task); +		/* Schedule the send tasklet. */ +		ipath_schedule_send(qp); -		goto done; +		goto unlock;  	}  	case OP(COMPARE_SWAP): @@ -1894,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		next = qp->r_head_ack_queue + 1;  		if (next > IPATH_MAX_RDMA_ATOMIC)  			next = 0; +		spin_lock_irqsave(&qp->s_lock, flags); +		/* Double check we can process this while holding the s_lock. */ +		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) +			goto unlock;  		if (unlikely(next == qp->s_tail_ack_queue)) {  			if (!qp->s_ack_queue[next].sent) -				goto nack_inv; +				goto nack_inv_unlck;  			ipath_update_ack_queue(qp, next);  		}  		if (!header_in_data) @@ -1906,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |  			be32_to_cpu(ateth->vaddr[1]);  		if (unlikely(vaddr & (sizeof(u64) - 1))) -			goto nack_inv; +			goto nack_inv_unlck;  		rkey = be32_to_cpu(ateth->rkey);  		/* Check rkey & NAK */  		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,  					    sizeof(u64), vaddr, rkey,  					    IB_ACCESS_REMOTE_ATOMIC))) -			goto nack_acc; +			goto nack_acc_unlck;  		/* Perform atomic OP and save result. */  		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;  		sdata = be64_to_cpu(ateth->swap_data); @@ -1929,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		qp->r_psn++;  		qp->r_state = opcode;  		qp->r_nak_state = 0; -		barrier();  		qp->r_head_ack_queue = next; -		/* Call ipath_do_rc_send() in another thread. */ -		tasklet_hi_schedule(&qp->s_task); +		/* Schedule the send tasklet. */ +		ipath_schedule_send(qp); -		goto done; +		goto unlock;  	}  	default: @@ -1951,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		goto send_ack;  	goto done; +rnr_nak: +	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; +	qp->r_ack_psn = qp->r_psn; +	goto send_ack; + +nack_inv_unlck: +	spin_unlock_irqrestore(&qp->s_lock, flags); +nack_inv: +	ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); +	qp->r_nak_state = IB_NAK_INVALID_REQUEST; +	qp->r_ack_psn = qp->r_psn; +	goto send_ack; + +nack_acc_unlck: +	spin_unlock_irqrestore(&qp->s_lock, flags);  nack_acc: -	ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); +	ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);  	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;  	qp->r_ack_psn = qp->r_psn; -  send_ack:  	send_rc_ack(qp); +	goto done; +unlock: +	spin_unlock_irqrestore(&qp->s_lock, flags);  done:  	return;  } diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 9e3fe61cbd08..a4b5521567fe 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.   * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.   *   * This software is available to you under a choice of one of two @@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {   * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device   * @qp: the QP   * + * Called with the QP s_lock held and interrupts disabled.   * XXX Use a simple list for now.  We might need a priority   * queue if we have lots of QPs waiting for RNR timeouts   * but that should be rare. @@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {  void ipath_insert_rnr_queue(struct ipath_qp *qp)  {  	struct ipath_ibdev *dev = to_idev(qp->ibqp.device); -	unsigned long flags; -	spin_lock_irqsave(&dev->pending_lock, flags); +	/* We already did a spin_lock_irqsave(), so just use spin_lock */ +	spin_lock(&dev->pending_lock);  	if (list_empty(&dev->rnrwait))  		list_add(&qp->timerwait, &dev->rnrwait);  	else { @@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)  			nqp->s_rnr_timeout -= qp->s_rnr_timeout;  		list_add(&qp->timerwait, l);  	} -	spin_unlock_irqrestore(&dev->pending_lock, flags); +	spin_unlock(&dev->pending_lock);  }  /** @@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,  	goto bail;  bad_lkey: +	memset(&wc, 0, sizeof(wc));  	wc.wr_id = wqe->wr_id;  	wc.status = IB_WC_LOC_PROT_ERR;  	wc.opcode = IB_WC_RECV; -	wc.vendor_err = 0; -	wc.byte_len = 0; -	wc.imm_data = 0;  	wc.qp = &qp->ibqp; -	wc.src_qp = 0; -	wc.wc_flags = 0; -	wc.pkey_index = 0; -	wc.slid = 0; -	wc.sl = 0; -	wc.dlid_path_bits = 0; -	wc.port_num = 0;  	/* Signal solicited completion event. */  	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);  	ret = 0; @@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)  	}  	spin_lock_irqsave(&rq->lock, flags); +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { +		ret = 0; +		goto unlock; +	} +  	wq = rq->wq;  	tail = wq->tail;  	/* Validate tail before using it since it is user writable. */ @@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)  		tail = 0;  	do {  		if (unlikely(tail == wq->head)) { -			spin_unlock_irqrestore(&rq->lock, flags);  			ret = 0; -			goto bail; +			goto unlock;  		}  		/* Make sure entry is read after head index is read. */  		smp_rmb(); @@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)  	wq->tail = tail;  	ret = 1; -	qp->r_wrid_valid = 1; +	set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);  	if (handler) {  		u32 n; @@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)  			goto bail;  		}  	} +unlock:  	spin_unlock_irqrestore(&rq->lock, flags); -  bail:  	return ret;  } @@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)  	struct ib_wc wc;  	u64 sdata;  	atomic64_t *maddr; +	enum ib_wc_status send_status; +	/* +	 * Note that we check the responder QP state after +	 * checking the requester's state. +	 */  	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); -	if (!qp) { -		dev->n_pkt_drops++; -		return; -	} -again:  	spin_lock_irqsave(&sqp->s_lock, flags); -	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || -	    sqp->s_rnr_timeout) { -		spin_unlock_irqrestore(&sqp->s_lock, flags); -		goto done; -	} +	/* Return if we are already busy processing a work request. */ +	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || +	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) +		goto unlock; -	/* Get the next send request. */ -	if (sqp->s_last == sqp->s_head) { -		/* Send work queue is empty. */ -		spin_unlock_irqrestore(&sqp->s_lock, flags); -		goto done; +	sqp->s_flags |= IPATH_S_BUSY; + +again: +	if (sqp->s_last == sqp->s_head) +		goto clr_busy; +	wqe = get_swqe_ptr(sqp, sqp->s_last); + +	/* Return if it is not OK to start a new work reqeust. */ +	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { +		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) +			goto clr_busy; +		/* We are in the error state, flush the work request. */ +		send_status = IB_WC_WR_FLUSH_ERR; +		goto flush_send;  	}  	/*  	 * We can rely on the entry not changing without the s_lock  	 * being held until we update s_last. +	 * We increment s_cur to indicate s_last is in progress.  	 */ -	wqe = get_swqe_ptr(sqp, sqp->s_last); +	if (sqp->s_last == sqp->s_cur) { +		if (++sqp->s_cur >= sqp->s_size) +			sqp->s_cur = 0; +	}  	spin_unlock_irqrestore(&sqp->s_lock, flags); -	wc.wc_flags = 0; -	wc.imm_data = 0; +	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { +		dev->n_pkt_drops++; +		/* +		 * For RC, the requester would timeout and retry so +		 * shortcut the timeouts and just signal too many retries. +		 */ +		if (sqp->ibqp.qp_type == IB_QPT_RC) +			send_status = IB_WC_RETRY_EXC_ERR; +		else +			send_status = IB_WC_SUCCESS; +		goto serr; +	} + +	memset(&wc, 0, sizeof wc); +	send_status = IB_WC_SUCCESS;  	sqp->s_sge.sge = wqe->sg_list[0];  	sqp->s_sge.sg_list = wqe->sg_list + 1; @@ -313,75 +334,33 @@ again:  		wc.imm_data = wqe->wr.ex.imm_data;  		/* FALLTHROUGH */  	case IB_WR_SEND: -		if (!ipath_get_rwqe(qp, 0)) { -		rnr_nak: -			/* Handle RNR NAK */ -			if (qp->ibqp.qp_type == IB_QPT_UC) -				goto send_comp; -			if (sqp->s_rnr_retry == 0) { -				wc.status = IB_WC_RNR_RETRY_EXC_ERR; -				goto err; -			} -			if (sqp->s_rnr_retry_cnt < 7) -				sqp->s_rnr_retry--; -			dev->n_rnr_naks++; -			sqp->s_rnr_timeout = -				ib_ipath_rnr_table[qp->r_min_rnr_timer]; -			ipath_insert_rnr_queue(sqp); -			goto done; -		} +		if (!ipath_get_rwqe(qp, 0)) +			goto rnr_nak;  		break;  	case IB_WR_RDMA_WRITE_WITH_IMM: -		if (unlikely(!(qp->qp_access_flags & -			       IB_ACCESS_REMOTE_WRITE))) { -			wc.status = IB_WC_REM_INV_REQ_ERR; -			goto err; -		} +		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) +			goto inv_err;  		wc.wc_flags = IB_WC_WITH_IMM;  		wc.imm_data = wqe->wr.ex.imm_data;  		if (!ipath_get_rwqe(qp, 1))  			goto rnr_nak;  		/* FALLTHROUGH */  	case IB_WR_RDMA_WRITE: -		if (unlikely(!(qp->qp_access_flags & -			       IB_ACCESS_REMOTE_WRITE))) { -			wc.status = IB_WC_REM_INV_REQ_ERR; -			goto err; -		} +		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) +			goto inv_err;  		if (wqe->length == 0)  			break;  		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,  					    wqe->wr.wr.rdma.remote_addr,  					    wqe->wr.wr.rdma.rkey, -					    IB_ACCESS_REMOTE_WRITE))) { -		acc_err: -			wc.status = IB_WC_REM_ACCESS_ERR; -		err: -			wc.wr_id = wqe->wr.wr_id; -			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -			wc.vendor_err = 0; -			wc.byte_len = 0; -			wc.qp = &sqp->ibqp; -			wc.src_qp = sqp->remote_qpn; -			wc.pkey_index = 0; -			wc.slid = sqp->remote_ah_attr.dlid; -			wc.sl = sqp->remote_ah_attr.sl; -			wc.dlid_path_bits = 0; -			wc.port_num = 0; -			spin_lock_irqsave(&sqp->s_lock, flags); -			ipath_sqerror_qp(sqp, &wc); -			spin_unlock_irqrestore(&sqp->s_lock, flags); -			goto done; -		} +					    IB_ACCESS_REMOTE_WRITE))) +			goto acc_err;  		break;  	case IB_WR_RDMA_READ: -		if (unlikely(!(qp->qp_access_flags & -			       IB_ACCESS_REMOTE_READ))) { -			wc.status = IB_WC_REM_INV_REQ_ERR; -			goto err; -		} +		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) +			goto inv_err;  		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,  					    wqe->wr.wr.rdma.remote_addr,  					    wqe->wr.wr.rdma.rkey, @@ -394,11 +373,8 @@ again:  	case IB_WR_ATOMIC_CMP_AND_SWP:  	case IB_WR_ATOMIC_FETCH_AND_ADD: -		if (unlikely(!(qp->qp_access_flags & -			       IB_ACCESS_REMOTE_ATOMIC))) { -			wc.status = IB_WC_REM_INV_REQ_ERR; -			goto err; -		} +		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) +			goto inv_err;  		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),  					    wqe->wr.wr.atomic.remote_addr,  					    wqe->wr.wr.atomic.rkey, @@ -415,7 +391,8 @@ again:  		goto send_comp;  	default: -		goto done; +		send_status = IB_WC_LOC_QP_OP_ERR; +		goto serr;  	}  	sge = &sqp->s_sge.sge; @@ -448,8 +425,7 @@ again:  		sqp->s_len -= len;  	} -	if (wqe->wr.opcode == IB_WR_RDMA_WRITE || -	    wqe->wr.opcode == IB_WR_RDMA_READ) +	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))  		goto send_comp;  	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -458,33 +434,89 @@ again:  		wc.opcode = IB_WC_RECV;  	wc.wr_id = qp->r_wr_id;  	wc.status = IB_WC_SUCCESS; -	wc.vendor_err = 0;  	wc.byte_len = wqe->length;  	wc.qp = &qp->ibqp;  	wc.src_qp = qp->remote_qpn; -	wc.pkey_index = 0;  	wc.slid = qp->remote_ah_attr.dlid;  	wc.sl = qp->remote_ah_attr.sl; -	wc.dlid_path_bits = 0;  	wc.port_num = 1;  	/* Signal completion event if the solicited bit is set. */  	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,  		       wqe->wr.send_flags & IB_SEND_SOLICITED);  send_comp: +	spin_lock_irqsave(&sqp->s_lock, flags); +flush_send:  	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; -	ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); +	ipath_send_complete(sqp, wqe, send_status);  	goto again; +rnr_nak: +	/* Handle RNR NAK */ +	if (qp->ibqp.qp_type == IB_QPT_UC) +		goto send_comp; +	/* +	 * Note: we don't need the s_lock held since the BUSY flag +	 * makes this single threaded. +	 */ +	if (sqp->s_rnr_retry == 0) { +		send_status = IB_WC_RNR_RETRY_EXC_ERR; +		goto serr; +	} +	if (sqp->s_rnr_retry_cnt < 7) +		sqp->s_rnr_retry--; +	spin_lock_irqsave(&sqp->s_lock, flags); +	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) +		goto clr_busy; +	sqp->s_flags |= IPATH_S_WAITING; +	dev->n_rnr_naks++; +	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; +	ipath_insert_rnr_queue(sqp); +	goto clr_busy; + +inv_err: +	send_status = IB_WC_REM_INV_REQ_ERR; +	wc.status = IB_WC_LOC_QP_OP_ERR; +	goto err; + +acc_err: +	send_status = IB_WC_REM_ACCESS_ERR; +	wc.status = IB_WC_LOC_PROT_ERR; +err: +	/* responder goes to error state */ +	ipath_rc_error(qp, wc.status); + +serr: +	spin_lock_irqsave(&sqp->s_lock, flags); +	ipath_send_complete(sqp, wqe, send_status); +	if (sqp->ibqp.qp_type == IB_QPT_RC) { +		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + +		sqp->s_flags &= ~IPATH_S_BUSY; +		spin_unlock_irqrestore(&sqp->s_lock, flags); +		if (lastwqe) { +			struct ib_event ev; + +			ev.device = sqp->ibqp.device; +			ev.element.qp = &sqp->ibqp; +			ev.event = IB_EVENT_QP_LAST_WQE_REACHED; +			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); +		} +		goto done; +	} +clr_busy: +	sqp->s_flags &= ~IPATH_S_BUSY; +unlock: +	spin_unlock_irqrestore(&sqp->s_lock, flags);  done: -	if (atomic_dec_and_test(&qp->refcount)) +	if (qp && atomic_dec_and_test(&qp->refcount))  		wake_up(&qp->wait);  }  static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)  {  	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || -		qp->ibqp.qp_type == IB_QPT_SMI) { +	    qp->ibqp.qp_type == IB_QPT_SMI) {  		unsigned long flags;  		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -502,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)   * @dev: the device we ran out of buffers on   *   * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request.   */ -static void ipath_no_bufs_available(struct ipath_qp *qp, +static int ipath_no_bufs_available(struct ipath_qp *qp,  				    struct ipath_ibdev *dev)  {  	unsigned long flags; +	int ret = 1;  	/*  	 * Note that as soon as want_buffer() is called and  	 * possibly before it returns, ipath_ib_piobufavail() -	 * could be called.  If we are still in the tasklet function, -	 * tasklet_hi_schedule() will not call us until the next time -	 * tasklet_hi_schedule() is called. -	 * We leave the busy flag set so that another post send doesn't -	 * try to put the same QP on the piowait list again. +	 * could be called. Therefore, put QP on the piowait list before +	 * enabling the PIO avail interrupt.  	 */ -	spin_lock_irqsave(&dev->pending_lock, flags); -	list_add_tail(&qp->piowait, &dev->piowait); -	spin_unlock_irqrestore(&dev->pending_lock, flags); -	want_buffer(dev->dd, qp); -	dev->n_piowait++; +	spin_lock_irqsave(&qp->s_lock, flags); +	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { +		dev->n_piowait++; +		qp->s_flags |= IPATH_S_WAITING; +		qp->s_flags &= ~IPATH_S_BUSY; +		spin_lock(&dev->pending_lock); +		if (list_empty(&qp->piowait)) +			list_add_tail(&qp->piowait, &dev->piowait); +		spin_unlock(&dev->pending_lock); +	} else +		ret = 0; +	spin_unlock_irqrestore(&qp->s_lock, flags); +	if (ret) +		want_buffer(dev->dd, qp); +	return ret;  }  /** @@ -597,15 +639,13 @@ void ipath_do_send(unsigned long data)  	struct ipath_qp *qp = (struct ipath_qp *)data;  	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);  	int (*make_req)(struct ipath_qp *qp); - -	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) -		goto bail; +	unsigned long flags;  	if ((qp->ibqp.qp_type == IB_QPT_RC ||  	     qp->ibqp.qp_type == IB_QPT_UC) &&  	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {  		ipath_ruc_loopback(qp); -		goto clear; +		goto bail;  	}  	if (qp->ibqp.qp_type == IB_QPT_RC) @@ -615,6 +655,19 @@ void ipath_do_send(unsigned long data)  	else  	       make_req = ipath_make_ud_req; +	spin_lock_irqsave(&qp->s_lock, flags); + +	/* Return if we are already busy processing a work request. */ +	if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || +	    !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { +		spin_unlock_irqrestore(&qp->s_lock, flags); +		goto bail; +	} + +	qp->s_flags |= IPATH_S_BUSY; + +	spin_unlock_irqrestore(&qp->s_lock, flags); +  again:  	/* Check for a constructed packet to be sent. */  	if (qp->s_hdrwords != 0) { @@ -624,8 +677,8 @@ again:  		 */  		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,  				     qp->s_cur_sge, qp->s_cur_size)) { -			ipath_no_bufs_available(qp, dev); -			goto bail; +			if (ipath_no_bufs_available(qp, dev)) +				goto bail;  		}  		dev->n_unicast_xmit++;  		/* Record that we sent the packet and s_hdr is empty. */ @@ -634,16 +687,20 @@ again:  	if (make_req(qp))  		goto again; -clear: -	clear_bit(IPATH_S_BUSY, &qp->s_busy); +  bail:;  } +/* + * This should be called with s_lock held. + */  void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,  			 enum ib_wc_status status)  { -	unsigned long flags; -	u32 last; +	u32 old_last, last; + +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) +		return;  	/* See ch. 11.2.4.1 and 10.7.3.1 */  	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -651,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,  	    status != IB_WC_SUCCESS) {  		struct ib_wc wc; +		memset(&wc, 0, sizeof wc);  		wc.wr_id = wqe->wr.wr_id;  		wc.status = status;  		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; -		wc.vendor_err = 0; -		wc.byte_len = wqe->length; -		wc.imm_data = 0;  		wc.qp = &qp->ibqp; -		wc.src_qp = 0; -		wc.wc_flags = 0; -		wc.pkey_index = 0; -		wc.slid = 0; -		wc.sl = 0; -		wc.dlid_path_bits = 0; -		wc.port_num = 0; -		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); +		if (status == IB_WC_SUCCESS) +			wc.byte_len = wqe->length; +		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, +			       status != IB_WC_SUCCESS);  	} -	spin_lock_irqsave(&qp->s_lock, flags); -	last = qp->s_last; +	old_last = last = qp->s_last;  	if (++last >= qp->s_size)  		last = 0;  	qp->s_last = last; -	spin_unlock_irqrestore(&qp->s_lock, flags); +	if (qp->s_cur == old_last) +		qp->s_cur = last; +	if (qp->s_tail == old_last) +		qp->s_tail = last; +	if (qp->state == IB_QPS_SQD && last == qp->s_cur) +		qp->s_draining = 0;  } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index bfe8926b5514..7fd18e833907 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.   * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.   *   * This software is available to you under a choice of one of two @@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)  {  	struct ipath_other_headers *ohdr;  	struct ipath_swqe *wqe; +	unsigned long flags;  	u32 hwords;  	u32 bth0;  	u32 len;  	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);  	int ret = 0; -	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) +	spin_lock_irqsave(&qp->s_lock, flags); + +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { +		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) +			goto bail; +		/* We are in the error state, flush the work request. */ +		if (qp->s_last == qp->s_head) +			goto bail; +		/* If DMAs are in progress, we can't flush immediately. */ +		if (atomic_read(&qp->s_dma_busy)) { +			qp->s_flags |= IPATH_S_WAIT_DMA; +			goto bail; +		} +		wqe = get_swqe_ptr(qp, qp->s_last); +		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);  		goto done; +	}  	ohdr = &qp->s_hdr.u.oth;  	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)  	qp->s_wqe = NULL;  	switch (qp->s_state) {  	default: +		if (!(ib_ipath_state_ops[qp->state] & +		    IPATH_PROCESS_NEXT_SEND_OK)) +			goto bail;  		/* Check if send work queue is empty. */  		if (qp->s_cur == qp->s_head) -			goto done; +			goto bail;  		/*  		 * Start a new request.  		 */ @@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)  			break;  		default: -			goto done; +			goto bail;  		}  		break; @@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)  	ipath_make_ruc_header(to_idev(qp->ibqp.device),  			      qp, ohdr, bth0 | (qp->s_state << 24),  			      qp->s_next_psn++ & IPATH_PSN_MASK); +done:  	ret = 1; +	goto unlock; -done: +bail: +	qp->s_flags &= ~IPATH_S_BUSY; +unlock: +	spin_unlock_irqrestore(&qp->s_lock, flags);  	return ret;  } @@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  	 */  	opcode = be32_to_cpu(ohdr->bth[0]) >> 24; -	wc.imm_data = 0; -	wc.wc_flags = 0; +	memset(&wc, 0, sizeof wc);  	/* Compare the PSN verses the expected PSN. */  	if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { @@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  	case OP(SEND_ONLY):  	case OP(SEND_ONLY_WITH_IMMEDIATE):  	send_first: -		if (qp->r_reuse_sge) { -			qp->r_reuse_sge = 0; +		if (qp->r_flags & IPATH_R_REUSE_SGE) { +			qp->r_flags &= ~IPATH_R_REUSE_SGE;  			qp->r_sge = qp->s_rdma_read_sge;  		} else if (!ipath_get_rwqe(qp, 0)) {  			dev->n_pkt_drops++; @@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  	case OP(SEND_MIDDLE):  		/* Check for invalid length PMTU or posted rwqe len. */  		if (unlikely(tlen != (hdrsize + pmtu + 4))) { -			qp->r_reuse_sge = 1; +			qp->r_flags |= IPATH_R_REUSE_SGE;  			dev->n_pkt_drops++;  			goto done;  		}  		qp->r_rcv_len += pmtu;  		if (unlikely(qp->r_rcv_len > qp->r_len)) { -			qp->r_reuse_sge = 1; +			qp->r_flags |= IPATH_R_REUSE_SGE;  			dev->n_pkt_drops++;  			goto done;  		} @@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		/* Check for invalid length. */  		/* XXX LAST len should be >= 1 */  		if (unlikely(tlen < (hdrsize + pad + 4))) { -			qp->r_reuse_sge = 1; +			qp->r_flags |= IPATH_R_REUSE_SGE;  			dev->n_pkt_drops++;  			goto done;  		} @@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		tlen -= (hdrsize + pad + 4);  		wc.byte_len = tlen + qp->r_rcv_len;  		if (unlikely(wc.byte_len > qp->r_len)) { -			qp->r_reuse_sge = 1; +			qp->r_flags |= IPATH_R_REUSE_SGE;  			dev->n_pkt_drops++;  			goto done;  		} @@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		wc.wr_id = qp->r_wr_id;  		wc.status = IB_WC_SUCCESS;  		wc.opcode = IB_WC_RECV; -		wc.vendor_err = 0;  		wc.qp = &qp->ibqp;  		wc.src_qp = qp->remote_qpn; -		wc.pkey_index = 0;  		wc.slid = qp->remote_ah_attr.dlid;  		wc.sl = qp->remote_ah_attr.sl; -		wc.dlid_path_bits = 0; -		wc.port_num = 0;  		/* Signal completion event if the solicited bit is set. */  		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,  			       (ohdr->bth[0] & @@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  			dev->n_pkt_drops++;  			goto done;  		} -		if (qp->r_reuse_sge) -			qp->r_reuse_sge = 0; +		if (qp->r_flags & IPATH_R_REUSE_SGE) +			qp->r_flags &= ~IPATH_R_REUSE_SGE;  		else if (!ipath_get_rwqe(qp, 1)) {  			dev->n_pkt_drops++;  			goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 8b6a261c89e3..77ca8ca74e78 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)  	u32 length;  	qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); -	if (!qp) { +	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {  		dev->n_pkt_drops++; -		goto send_comp; +		goto done;  	}  	rsge.sg_list = NULL; @@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)  	 * present on the wire.  	 */  	length = swqe->length; +	memset(&wc, 0, sizeof wc);  	wc.byte_len = length + sizeof(struct ib_grh);  	if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {  		wc.wc_flags = IB_WC_WITH_IMM;  		wc.imm_data = swqe->wr.ex.imm_data; -	} else { -		wc.wc_flags = 0; -		wc.imm_data = 0;  	}  	/* @@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)  	}  	wc.status = IB_WC_SUCCESS;  	wc.opcode = IB_WC_RECV; -	wc.vendor_err = 0;  	wc.qp = &qp->ibqp;  	wc.src_qp = sqp->ibqp.qp_num;  	/* XXX do we know which pkey matched? Only needed for GSI. */ @@ -248,8 +245,7 @@ drop:  	kfree(rsge.sg_list);  	if (atomic_dec_and_test(&qp->refcount))  		wake_up(&qp->wait); -send_comp: -	ipath_send_complete(sqp, swqe, IB_WC_SUCCESS); +done:;  }  /** @@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)  	struct ipath_other_headers *ohdr;  	struct ib_ah_attr *ah_attr;  	struct ipath_swqe *wqe; +	unsigned long flags;  	u32 nwords;  	u32 extra_bytes;  	u32 bth0; @@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)  	u16 lid;  	int ret = 0; -	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) -		goto bail; +	spin_lock_irqsave(&qp->s_lock, flags); + +	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { +		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) +			goto bail; +		/* We are in the error state, flush the work request. */ +		if (qp->s_last == qp->s_head) +			goto bail; +		/* If DMAs are in progress, we can't flush immediately. */ +		if (atomic_read(&qp->s_dma_busy)) { +			qp->s_flags |= IPATH_S_WAIT_DMA; +			goto bail; +		} +		wqe = get_swqe_ptr(qp, qp->s_last); +		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); +		goto done; +	}  	if (qp->s_cur == qp->s_head)  		goto bail;  	wqe = get_swqe_ptr(qp, qp->s_cur); +	if (++qp->s_cur >= qp->s_size) +		qp->s_cur = 0;  	/* Construct the header. */  	ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)  			dev->n_unicast_xmit++;  	} else {  		dev->n_unicast_xmit++; -		lid = ah_attr->dlid & -			~((1 << dev->dd->ipath_lmc) - 1); +		lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);  		if (unlikely(lid == dev->dd->ipath_lid)) { +			/* +			 * If DMAs are in progress, we can't generate +			 * a completion for the loopback packet since +			 * it would be out of order. +			 * XXX Instead of waiting, we could queue a +			 * zero length descriptor so we get a callback. +			 */ +			if (atomic_read(&qp->s_dma_busy)) { +				qp->s_flags |= IPATH_S_WAIT_DMA; +				goto bail; +			} +			spin_unlock_irqrestore(&qp->s_lock, flags);  			ipath_ud_loopback(qp, wqe); +			spin_lock_irqsave(&qp->s_lock, flags); +			ipath_send_complete(qp, wqe, IB_WC_SUCCESS);  			goto done;  		}  	} @@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)  	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);  done: -	if (++qp->s_cur >= qp->s_size) -		qp->s_cur = 0;  	ret = 1; +	goto unlock;  bail: +	qp->s_flags &= ~IPATH_S_BUSY; +unlock: +	spin_unlock_irqrestore(&qp->s_lock, flags);  	return ret;  } @@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  	/*  	 * Get the next work request entry to find where to put the data.  	 */ -	if (qp->r_reuse_sge) -		qp->r_reuse_sge = 0; +	if (qp->r_flags & IPATH_R_REUSE_SGE) +		qp->r_flags &= ~IPATH_R_REUSE_SGE;  	else if (!ipath_get_rwqe(qp, 0)) {  		/*  		 * Count VL15 packets dropped due to no receive buffer. @@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  	}  	/* Silently drop packets which are too big. */  	if (wc.byte_len > qp->r_len) { -		qp->r_reuse_sge = 1; +		qp->r_flags |= IPATH_R_REUSE_SGE;  		dev->n_pkt_drops++;  		goto bail;  	} @@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));  	ipath_copy_sge(&qp->r_sge, data,  		       wc.byte_len - sizeof(struct ib_grh)); -	qp->r_wrid_valid = 0; +	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) +		goto bail;  	wc.wr_id = qp->r_wr_id;  	wc.status = IB_WC_SUCCESS;  	wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h index e70946c1428c..fc76316c4a58 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,  int ipath_user_sdma_make_progress(struct ipath_devdata *dd,  				  struct ipath_user_sdma_queue *pq); -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, -			     u32 counter);  void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,  				 struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 5015cd2e57bd..e0ec540042bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma;  module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);  MODULE_PARM_DESC(disable_sma, "Disable the SMA"); +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; ipath_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */  const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {  	[IB_QPS_RESET] = 0,  	[IB_QPS_INIT] = IPATH_POST_RECV_OK,  	[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,  	[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | -	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, +	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | +	    IPATH_PROCESS_NEXT_SEND_OK,  	[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | -	    IPATH_POST_SEND_OK, -	[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, -	[IB_QPS_ERR] = 0, +	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, +	[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | +	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, +	[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | +	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,  };  struct ipath_ucontext { @@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)  	}  } -static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) -{ -	struct ib_wc wc; - -	memset(&wc, 0, sizeof(wc)); -	wc.wr_id = wr->wr_id; -	wc.status = IB_WC_WR_FLUSH_ERR; -	wc.opcode = ib_ipath_wc_opcode[wr->opcode]; -	wc.qp = &qp->ibqp; -	ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); -} -  /*   * Count the number of DMA descriptors needed to send length bytes of data.   * Don't modify the ipath_sge_state to get the count. @@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)  	spin_lock_irqsave(&qp->s_lock, flags);  	/* Check that state is OK to post send. */ -	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { -		if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) -			goto bail_inval; -		/* C10-96 says generate a flushed completion entry. */ -		ipath_flush_wqe(qp, wr); -		ret = 0; -		goto bail; -	} +	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) +		goto bail_inval;  	/* IB spec says that num_sge == 0 is OK. */  	if (wr->num_sge > qp->s_max_sge) @@ -677,6 +667,7 @@ bail:;  static void ipath_ib_timer(struct ipath_ibdev *dev)  {  	struct ipath_qp *resend = NULL; +	struct ipath_qp *rnr = NULL;  	struct list_head *last;  	struct ipath_qp *qp;  	unsigned long flags; @@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)  		if (--qp->s_rnr_timeout == 0) {  			do {  				list_del_init(&qp->timerwait); -				tasklet_hi_schedule(&qp->s_task); +				qp->timer_next = rnr; +				rnr = qp; +				atomic_inc(&qp->refcount);  				if (list_empty(last))  					break;  				qp = list_entry(last->next, struct ipath_qp, @@ -743,13 +736,15 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)  	spin_unlock_irqrestore(&dev->pending_lock, flags);  	/* XXX What if timer fires again while this is running? */ -	for (qp = resend; qp != NULL; qp = qp->timer_next) { -		struct ib_wc wc; +	while (resend != NULL) { +		qp = resend; +		resend = qp->timer_next;  		spin_lock_irqsave(&qp->s_lock, flags); -		if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { +		if (qp->s_last != qp->s_tail && +		    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {  			dev->n_timeouts++; -			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); +			ipath_restart_rc(qp, qp->s_last_psn + 1);  		}  		spin_unlock_irqrestore(&qp->s_lock, flags); @@ -757,6 +752,19 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)  		if (atomic_dec_and_test(&qp->refcount))  			wake_up(&qp->wait);  	} +	while (rnr != NULL) { +		qp = rnr; +		rnr = qp->timer_next; + +		spin_lock_irqsave(&qp->s_lock, flags); +		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) +			ipath_schedule_send(qp); +		spin_unlock_irqrestore(&qp->s_lock, flags); + +		/* Notify ipath_destroy_qp() if it is waiting. */ +		if (atomic_dec_and_test(&qp->refcount)) +			wake_up(&qp->wait); +	}  }  static void update_sge(struct ipath_sge_state *ss, u32 length) @@ -1012,13 +1020,24 @@ static void sdma_complete(void *cookie, int status)  	struct ipath_verbs_txreq *tx = cookie;  	struct ipath_qp *qp = tx->qp;  	struct ipath_ibdev *dev = to_idev(qp->ibqp.device); +	unsigned int flags; +	enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? +		IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; -	/* Generate a completion queue entry if needed */ -	if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { -		enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? -			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - +	if (atomic_dec_and_test(&qp->s_dma_busy)) { +		spin_lock_irqsave(&qp->s_lock, flags); +		if (tx->wqe) +			ipath_send_complete(qp, tx->wqe, ibs); +		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && +		     qp->s_last != qp->s_head) || +		    (qp->s_flags & IPATH_S_WAIT_DMA)) +			ipath_schedule_send(qp); +		spin_unlock_irqrestore(&qp->s_lock, flags); +		wake_up(&qp->wait_dma); +	} else if (tx->wqe) { +		spin_lock_irqsave(&qp->s_lock, flags);  		ipath_send_complete(qp, tx->wqe, ibs); +		spin_unlock_irqrestore(&qp->s_lock, flags);  	}  	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) @@ -1029,6 +1048,21 @@ static void sdma_complete(void *cookie, int status)  		wake_up(&qp->wait);  } +static void decrement_dma_busy(struct ipath_qp *qp) +{ +	unsigned int flags; + +	if (atomic_dec_and_test(&qp->s_dma_busy)) { +		spin_lock_irqsave(&qp->s_lock, flags); +		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && +		     qp->s_last != qp->s_head) || +		    (qp->s_flags & IPATH_S_WAIT_DMA)) +			ipath_schedule_send(qp); +		spin_unlock_irqrestore(&qp->s_lock, flags); +		wake_up(&qp->wait_dma); +	} +} +  /*   * Compute the number of clock cycles of delay before sending the next packet.   * The multipliers reflect the number of clocks for the fastest rate so @@ -1067,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,  	if (tx) {  		qp->s_tx = NULL;  		/* resend previously constructed packet */ +		atomic_inc(&qp->s_dma_busy);  		ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); -		if (ret) +		if (ret) {  			qp->s_tx = tx; +			decrement_dma_busy(qp); +		}  		goto bail;  	} @@ -1120,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,  		tx->txreq.sg_count = ndesc;  		tx->map_len = (hdrwords + 2) << 2;  		tx->txreq.map_addr = &tx->hdr; +		atomic_inc(&qp->s_dma_busy);  		ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);  		if (ret) {  			/* save ss and length in dwords */  			tx->ss = ss;  			tx->len = dwords;  			qp->s_tx = tx; +			decrement_dma_busy(qp);  		}  		goto bail;  	} @@ -1146,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,  	memcpy(piobuf, hdr, hdrwords << 2);  	ipath_copy_from_sge(piobuf + hdrwords, ss, len); +	atomic_inc(&qp->s_dma_busy);  	ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);  	/*  	 * If we couldn't queue the DMA request, save the info @@ -1156,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,  		tx->ss = NULL;  		tx->len = 0;  		qp->s_tx = tx; +		decrement_dma_busy(qp);  	}  	dev->n_unaligned++;  	goto bail; @@ -1179,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,  	unsigned flush_wc;  	u32 control;  	int ret; +	unsigned int flags;  	piobuf = ipath_getpiobuf(dd, plen, NULL);  	if (unlikely(piobuf == NULL)) { @@ -1249,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,  	}  	copy_io(piobuf, ss, len, flush_wc);  done: -	if (qp->s_wqe) +	if (qp->s_wqe) { +		spin_lock_irqsave(&qp->s_lock, flags);  		ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); +		spin_unlock_irqrestore(&qp->s_lock, flags); +	}  	ret = 0;  bail:  	return ret; @@ -1283,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,  	 * can defer SDMA restart until link goes ACTIVE without  	 * worrying about just how we got there.  	 */ -	if (qp->ibqp.qp_type == IB_QPT_SMI) +	if (qp->ibqp.qp_type == IB_QPT_SMI || +	    !(dd->ipath_flags & IPATH_HAS_SEND_DMA))  		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,  					   plen, dwords); -	/* All non-VL15 packets are dropped if link is not ACTIVE */ -	else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { -		if (qp->s_wqe) -			ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); -		ret = 0; -	} else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) -		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, -					   plen, dwords);  	else -		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, +		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,  					   plen, dwords);  	return ret; @@ -1403,27 +1441,46 @@ bail:   * This is called from ipath_intr() at interrupt level when a PIO buffer is   * available after ipath_verbs_send() returned an error that no buffers were   * available.  Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and + * QPs waiting for buffers (for now, just restart the send tasklet and   * return zero).   */  int ipath_ib_piobufavail(struct ipath_ibdev *dev)  { +	struct list_head *list; +	struct ipath_qp *qplist;  	struct ipath_qp *qp;  	unsigned long flags;  	if (dev == NULL)  		goto bail; +	list = &dev->piowait; +	qplist = NULL; +  	spin_lock_irqsave(&dev->pending_lock, flags); -	while (!list_empty(&dev->piowait)) { -		qp = list_entry(dev->piowait.next, struct ipath_qp, -				piowait); +	while (!list_empty(list)) { +		qp = list_entry(list->next, struct ipath_qp, piowait);  		list_del_init(&qp->piowait); -		clear_bit(IPATH_S_BUSY, &qp->s_busy); -		tasklet_hi_schedule(&qp->s_task); +		qp->pio_next = qplist; +		qplist = qp; +		atomic_inc(&qp->refcount);  	}  	spin_unlock_irqrestore(&dev->pending_lock, flags); +	while (qplist != NULL) { +		qp = qplist; +		qplist = qp->pio_next; + +		spin_lock_irqsave(&qp->s_lock, flags); +		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) +			ipath_schedule_send(qp); +		spin_unlock_irqrestore(&qp->s_lock, flags); + +		/* Notify ipath_destroy_qp() if it is waiting. */ +		if (atomic_dec_and_test(&qp->refcount)) +			wake_up(&qp->wait); +	} +  bail:  	return 0;  } @@ -2145,11 +2202,12 @@ bail:  void ipath_unregister_ib_device(struct ipath_ibdev *dev)  {  	struct ib_device *ibdev = &dev->ibdev; - -	disable_timer(dev->dd); +	u32 qps_inuse;  	ib_unregister_device(ibdev); +	disable_timer(dev->dd); +  	if (!list_empty(&dev->pending[0]) ||  	    !list_empty(&dev->pending[1]) ||  	    !list_empty(&dev->pending[2])) @@ -2164,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)  	 * Note that ipath_unregister_ib_device() can be called before all  	 * the QPs are destroyed!  	 */ -	ipath_free_all_qps(&dev->qp_table); +	qps_inuse = ipath_free_all_qps(&dev->qp_table); +	if (qps_inuse) +		ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", +			qps_inuse);  	kfree(dev->qp_table.table);  	kfree(dev->lk_table.table);  	kfree(dev->txreq_bufs); @@ -2215,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,  		      "RC OTH NAKs %d\n"  		      "RC timeouts %d\n"  		      "RC RDMA dup %d\n" -		      "RC stalls   %d\n"  		      "piobuf wait %d\n" -		      "no piobuf   %d\n"  		      "unaligned   %d\n"  		      "PKT drops   %d\n"  		      "WQE errs    %d\n",  		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,  		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,  		      dev->n_other_naks, dev->n_timeouts, -		      dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, -		      dev->n_no_piobuf, dev->n_unaligned, +		      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,  		      dev->n_pkt_drops, dev->n_wqe_errs);  	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {  		const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 6514aa8306cd..9d12ae8a778e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -74,6 +74,11 @@  #define IPATH_POST_RECV_OK		0x02  #define IPATH_PROCESS_RECV_OK		0x04  #define IPATH_PROCESS_SEND_OK		0x08 +#define IPATH_PROCESS_NEXT_SEND_OK	0x10 +#define IPATH_FLUSH_SEND		0x20 +#define IPATH_FLUSH_RECV		0x40 +#define IPATH_PROCESS_OR_FLUSH_SEND \ +	(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)  /* IB Performance Manager status values */  #define IB_PMA_SAMPLE_STATUS_DONE	0x00 @@ -353,12 +358,14 @@ struct ipath_qp {  	struct ib_qp ibqp;  	struct ipath_qp *next;		/* link list for QPN hash table */  	struct ipath_qp *timer_next;	/* link list for ipath_ib_timer() */ +	struct ipath_qp *pio_next;	/* link for ipath_ib_piobufavail() */  	struct list_head piowait;	/* link for wait PIO buf */  	struct list_head timerwait;	/* link for waiting for timeouts */  	struct ib_ah_attr remote_ah_attr;  	struct ipath_ib_header s_hdr;	/* next packet header to send */  	atomic_t refcount;  	wait_queue_head_t wait; +	wait_queue_head_t wait_dma;  	struct tasklet_struct s_task;  	struct ipath_mmap_info *ip;  	struct ipath_sge_state *s_cur_sge; @@ -369,7 +376,7 @@ struct ipath_qp {  	struct ipath_sge_state s_rdma_read_sge;  	struct ipath_sge_state r_sge;	/* current receive data */  	spinlock_t s_lock; -	unsigned long s_busy; +	atomic_t s_dma_busy;  	u16 s_pkt_delay;  	u16 s_hdrwords;		/* size of s_hdr in 32 bit words */  	u32 s_cur_size;		/* size of send packet in bytes */ @@ -383,6 +390,7 @@ struct ipath_qp {  	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */  	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */  	u64 r_wr_id;		/* ID for current receive WQE */ +	unsigned long r_aflags;  	u32 r_len;		/* total length of r_sge */  	u32 r_rcv_len;		/* receive data len processed */  	u32 r_psn;		/* expected rcv packet sequence number */ @@ -394,8 +402,7 @@ struct ipath_qp {  	u8 r_state;		/* opcode of last packet received */  	u8 r_nak_state;		/* non-zero if NAK is pending */  	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */ -	u8 r_reuse_sge;		/* for UC receive errors */ -	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */ +	u8 r_flags;  	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */  	u8 r_head_ack_queue;	/* index into s_ack_queue[] */  	u8 qp_access_flags; @@ -404,13 +411,13 @@ struct ipath_qp {  	u8 s_rnr_retry_cnt;  	u8 s_retry;		/* requester retry counter */  	u8 s_rnr_retry;		/* requester RNR retry counter */ -	u8 s_wait_credit;	/* limit number of unacked packets sent */  	u8 s_pkey_index;	/* PKEY index to use */  	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */  	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */  	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */  	u8 s_flags;  	u8 s_dmult; +	u8 s_draining;  	u8 timeout;		/* Timeout for this QP */  	enum ib_mtu path_mtu;  	u32 remote_qpn; @@ -428,16 +435,40 @@ struct ipath_qp {  	struct ipath_sge r_sg_list[0];	/* verified SGEs */  }; -/* Bit definition for s_busy. */ -#define IPATH_S_BUSY		0 +/* + * Atomic bit definitions for r_aflags. + */ +#define IPATH_R_WRID_VALID	0 + +/* + * Bit definitions for r_flags. + */ +#define IPATH_R_REUSE_SGE	0x01 +#define IPATH_R_RDMAR_SEQ	0x02  /*   * Bit definitions for s_flags. + * + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs + *			   before processing the next SWQE + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs + *			   before processing the next SWQE + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating + *		      next send completion entry not via send DMA.   */  #define IPATH_S_SIGNAL_REQ_WR	0x01  #define IPATH_S_FENCE_PENDING	0x02  #define IPATH_S_RDMAR_PENDING	0x04  #define IPATH_S_ACK_PENDING	0x08 +#define IPATH_S_BUSY		0x10 +#define IPATH_S_WAITING		0x20 +#define IPATH_S_WAIT_SSN_CREDIT	0x40 +#define IPATH_S_WAIT_DMA	0x80 + +#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ +	IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)  #define IPATH_PSN_CREDIT	512 @@ -573,13 +604,11 @@ struct ipath_ibdev {  	u32 n_rnr_naks;  	u32 n_other_naks;  	u32 n_timeouts; -	u32 n_rc_stalls;  	u32 n_pkt_drops;  	u32 n_vl15_dropped;  	u32 n_wqe_errs;  	u32 n_rdma_dup_busy;  	u32 n_piowait; -	u32 n_no_piobuf;  	u32 n_unaligned;  	u32 port_cap_flags;  	u32 pma_sample_start; @@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)  	return container_of(ibdev, struct ipath_ibdev, ibdev);  } +/* + * This must be called with s_lock held. + */ +static inline void ipath_schedule_send(struct ipath_qp *qp) +{ +	if (qp->s_flags & IPATH_S_ANY_WAIT) +		qp->s_flags &= ~IPATH_S_ANY_WAIT; +	if (!(qp->s_flags & IPATH_S_BUSY)) +		tasklet_hi_schedule(&qp->s_task); +} +  int ipath_process_mad(struct ib_device *ibdev,  		      int mad_flags,  		      u8 port_num, @@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  		   int attr_mask, struct ib_qp_init_attr *init_attr); -void ipath_free_all_qps(struct ipath_qp_table *qpt); +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);  int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); -  void ipath_get_credit(struct ipath_qp *qp, u32 aeth);  unsigned ipath_ib_rate_to_mult(enum ib_rate rate); @@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,  		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp); -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); +void ipath_restart_rc(struct ipath_qp *qp, u32 psn); + +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);  int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); | 
