[NET]: Make NAPI polling independent of struct net_device objects.

Several devices have multiple independant RX queues per net device, and some have a single interrupt doorbell for several queues. In either case, it's easier to support layouts like that if the structure representing the poll is independant from the net device itself. The signature of the ->poll() call back goes from: int foo_poll(struct net_device *dev, int *budget) to int foo_poll(struct napi_struct *napi, int budget) The caller is returned the number of RX packets processed (or the number of "NAPI credits" consumed if you want to get abstract). The callee no longer messes around bumping dev->quota, *budget, etc. because that is all handled in the caller upon return. The napi_struct is to be embedded in the device driver private data structures. Furthermore, it is the driver's responsibility to disable all NAPI instances in it's ->stop() device close handler. Since the napi_struct is privatized into the driver's private data structures, only the driver knows how to get at all of the napi_struct instances it may have per-device. With lots of help and suggestions from Rusty Russell, Roland Dreier, Michael Chan, Jeff Garzik, and Jamal Hadi Salim. Bug fixes from Thomas Graf, Roland Dreier, Peter Zijlstra, Joseph Fannin, Scott Wood, Hans J. Koch, and Michael Chan. [ Ported to current tree and all drivers converted. Integrated Stephen's follow-on kerneldoc additions, and restored poll_list handling to the old style to fix mutual exclusion issues. -DaveM ] Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Stephen Hemminger <shemminger@linux-foundation.org> 2007-10-03 16:41:36 -0700
committer: David S. Miller <davem@sunset.davemloft.net> 2007-10-10 16:47:45 -0700
commit: bea3348eef27e6044b6161fd04c3152215f96411 (patch)
tree: f0990b263e5ce42505d290a4c346fe990bcd4c33 /drivers/net/cxgb3/sge.c
parent: dde4e47e8fe333a5649a3fa0e7db1fa7c08d6158 (diff)
1 files changed, 68 insertions, 102 deletions
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 58a5f60521ed..069c1aca8a6b 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -591,9 +591,6 @@ void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 				  q->rspq.desc, q->rspq.phys_addr);
 	}
 
-	if (q->netdev)
-		q->netdev->atalk_ptr = NULL;
-
 	memset(q, 0, sizeof(*q));
 }
 
@@ -1074,7 +1071,7 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned int ndesc, pidx, credits, gen, compl;
 	const struct port_info *pi = netdev_priv(dev);
 	struct adapter *adap = pi->adapter;
-	struct sge_qset *qs = dev2qset(dev);
+	struct sge_qset *qs = pi->qs;
 	struct sge_txq *q = &qs->txq[TXQ_ETH];
 
 	/*
@@ -1326,13 +1323,12 @@ static void restart_ctrlq(unsigned long data)
 	struct sk_buff *skb;
 	struct sge_qset *qs = (struct sge_qset *)data;
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
-	const struct port_info *pi = netdev_priv(qs->netdev);
-	struct adapter *adap = pi->adapter;
 
 	spin_lock(&q->lock);
       again:reclaim_completed_tx_imm(q);
 
-	while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) {
+	while (q->in_use < q->size &&
+	       (skb = __skb_dequeue(&q->sendq)) != NULL) {
 
 		write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
 
@@ -1354,7 +1350,7 @@ static void restart_ctrlq(unsigned long data)
 	}
 
 	spin_unlock(&q->lock);
-	t3_write_reg(adap, A_SG_KDOORBELL,
+	t3_write_reg(qs->adap, A_SG_KDOORBELL,
 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 }
 
@@ -1638,8 +1634,7 @@ static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
 	else {
 		struct sge_qset *qs = rspq_to_qset(q);
 
-		if (__netif_rx_schedule_prep(qs->netdev))
-			__netif_rx_schedule(qs->netdev);
+		napi_schedule(&qs->napi);
 		q->rx_head = skb;
 	}
 	q->rx_tail = skb;
@@ -1675,34 +1670,30 @@ static inline void deliver_partial_bundle(struct t3cdev *tdev,
  *	receive handler.  Batches need to be of modest size as we do prefetches
  *	on the packets in each.
  */
-static int ofld_poll(struct net_device *dev, int *budget)
+static int ofld_poll(struct napi_struct *napi, int budget)
 {
-	const struct port_info *pi = netdev_priv(dev);
-	struct adapter *adapter = pi->adapter;
-	struct sge_qset *qs = dev2qset(dev);
+	struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
 	struct sge_rspq *q = &qs->rspq;
-	int work_done, limit = min(*budget, dev->quota), avail = limit;
+	struct adapter *adapter = qs->adap;
+	int work_done = 0;
 
-	while (avail) {
+	while (work_done < budget) {
 		struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
 		int ngathered;
 
 		spin_lock_irq(&q->lock);
 		head = q->rx_head;
 		if (!head) {
-			work_done = limit - avail;
-			*budget -= work_done;
-			dev->quota -= work_done;
-			__netif_rx_complete(dev);
+			napi_complete(napi);
 			spin_unlock_irq(&q->lock);
-			return 0;
+			return work_done;
 		}
 
 		tail = q->rx_tail;
 		q->rx_head = q->rx_tail = NULL;
 		spin_unlock_irq(&q->lock);
 
-		for (ngathered = 0; avail && head; avail--) {
+		for (ngathered = 0; work_done < budget && head; work_done++) {
 			prefetch(head->data);
 			skbs[ngathered] = head;
 			head = head->next;
@@ -1724,10 +1715,8 @@ static int ofld_poll(struct net_device *dev, int *budget)
 		}
 		deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
 	}
-	work_done = limit - avail;
-	*budget -= work_done;
-	dev->quota -= work_done;
-	return 1;
+
+	return work_done;
 }
 
 /**
@@ -2071,50 +2060,47 @@ static inline int is_pure_response(const struct rsp_desc *r)
 
 /**
  *	napi_rx_handler - the NAPI handler for Rx processing
- *	@dev: the net device
+ *	@napi: the napi instance
  *	@budget: how many packets we can process in this round
  *
  *	Handler for new data events when using NAPI.
  */
-static int napi_rx_handler(struct net_device *dev, int *budget)
+static int napi_rx_handler(struct napi_struct *napi, int budget)
 {
-	const struct port_info *pi = netdev_priv(dev);
-	struct adapter *adap = pi->adapter;
-	struct sge_qset *qs = dev2qset(dev);
-	int effective_budget = min(*budget, dev->quota);
-
-	int work_done = process_responses(adap, qs, effective_budget);
-	*budget -= work_done;
-	dev->quota -= work_done;
+	struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
+	struct adapter *adap = qs->adap;
+	int work_done = process_responses(adap, qs, budget);
 
-	if (work_done >= effective_budget)
-		return 1;
-
-	netif_rx_complete(dev);
+	if (likely(work_done < budget)) {
+		napi_complete(napi);
 
-	/*
-	 * Because we don't atomically flush the following write it is
-	 * possible that in very rare cases it can reach the device in a way
-	 * that races with a new response being written plus an error interrupt
-	 * causing the NAPI interrupt handler below to return unhandled status
-	 * to the OS.  To protect against this would require flushing the write
-	 * and doing both the write and the flush with interrupts off.  Way too
-	 * expensive and unjustifiable given the rarity of the race.
-	 *
-	 * The race cannot happen at all with MSI-X.
-	 */
-	t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
-		     V_NEWTIMER(qs->rspq.next_holdoff) |
-		     V_NEWINDEX(qs->rspq.cidx));
-	return 0;
+		/*
+		 * Because we don't atomically flush the following
+		 * write it is possible that in very rare cases it can
+		 * reach the device in a way that races with a new
+		 * response being written plus an error interrupt
+		 * causing the NAPI interrupt handler below to return
+		 * unhandled status to the OS.  To protect against
+		 * this would require flushing the write and doing
+		 * both the write and the flush with interrupts off.
+		 * Way too expensive and unjustifiable given the
+		 * rarity of the race.
+		 *
+		 * The race cannot happen at all with MSI-X.
+		 */
+		t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
+			     V_NEWTIMER(qs->rspq.next_holdoff) |
+			     V_NEWINDEX(qs->rspq.cidx));
+	}
+	return work_done;
 }
 
 /*
  * Returns true if the device is already scheduled for polling.
  */
-static inline int napi_is_scheduled(struct net_device *dev)
+static inline int napi_is_scheduled(struct napi_struct *napi)
 {
-	return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
+	return test_bit(NAPI_STATE_SCHED, &napi->state);
 }
 
 /**
@@ -2197,8 +2183,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
 			     V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
 		return 0;
 	}
-	if (likely(__netif_rx_schedule_prep(qs->netdev)))
-		__netif_rx_schedule(qs->netdev);
+	napi_schedule(&qs->napi);
 	return 1;
 }
 
@@ -2209,8 +2194,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
 {
 	struct sge_qset *qs = cookie;
-	const struct port_info *pi = netdev_priv(qs->netdev);
-	struct adapter *adap = pi->adapter;
+	struct adapter *adap = qs->adap;
 	struct sge_rspq *q = &qs->rspq;
 
 	spin_lock(&q->lock);
@@ -2229,13 +2213,11 @@ irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
 irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
 {
 	struct sge_qset *qs = cookie;
-	const struct port_info *pi = netdev_priv(qs->netdev);
-	struct adapter *adap = pi->adapter;
 	struct sge_rspq *q = &qs->rspq;
 
 	spin_lock(&q->lock);
 
-	if (handle_responses(adap, q) < 0)
+	if (handle_responses(qs->adap, q) < 0)
 		q->unhandled_irqs++;
 	spin_unlock(&q->lock);
 	return IRQ_HANDLED;
@@ -2278,11 +2260,13 @@ static irqreturn_t t3_intr_msi(int irq, void *cookie)
 	return IRQ_HANDLED;
 }
 
-static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
+static int rspq_check_napi(struct sge_qset *qs)
 {
-	if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
+	struct sge_rspq *q = &qs->rspq;
+
+	if (!napi_is_scheduled(&qs->napi) &&
+	    is_new_response(&q->desc[q->cidx], q)) {
+		napi_schedule(&qs->napi);
 		return 1;
 	}
 	return 0;
@@ -2303,10 +2287,9 @@ irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
 
 	spin_lock(&q->lock);
 
-	new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
+	new_packets = rspq_check_napi(&adap->sge.qs[0]);
 	if (adap->params.nports == 2)
-		new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
-					       &adap->sge.qs[1].rspq);
+		new_packets += rspq_check_napi(&adap->sge.qs[1]);
 	if (!new_packets && t3_slow_intr_handler(adap) == 0)
 		q->unhandled_irqs++;
 
@@ -2409,9 +2392,9 @@ static irqreturn_t t3b_intr(int irq, void *cookie)
 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
 {
 	u32 map;
-	struct net_device *dev;
 	struct adapter *adap = cookie;
-	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
+	struct sge_qset *qs0 = &adap->sge.qs[0];
+	struct sge_rspq *q0 = &qs0->rspq;
 
 	t3_write_reg(adap, A_PL_CLI, 0);
 	map = t3_read_reg(adap, A_SG_DATA_INTR);
@@ -2424,18 +2407,11 @@ static irqreturn_t t3b_intr_napi(int irq, void *cookie)
 	if (unlikely(map & F_ERRINTR))
 		t3_slow_intr_handler(adap);
 
-	if (likely(map & 1)) {
-		dev = adap->sge.qs[0].netdev;
-
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
-	}
-	if (map & 2) {
-		dev = adap->sge.qs[1].netdev;
+	if (likely(map & 1))
+		napi_schedule(&qs0->napi);
 
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
-	}
+	if (map & 2)
+		napi_schedule(&adap->sge.qs[1].napi);
 
 	spin_unlock(&q0->lock);
 	return IRQ_HANDLED;
@@ -2514,8 +2490,7 @@ static void sge_timer_cb(unsigned long data)
 {
 	spinlock_t *lock;
 	struct sge_qset *qs = (struct sge_qset *)data;
-	const struct port_info *pi = netdev_priv(qs->netdev);
-	struct adapter *adap = pi->adapter;
+	struct adapter *adap = qs->adap;
 
 	if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
 		reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
@@ -2526,9 +2501,9 @@ static void sge_timer_cb(unsigned long data)
 		spin_unlock(&qs->txq[TXQ_OFLD].lock);
 	}
 	lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
-	    &adap->sge.qs[0].rspq.lock;
+					    &adap->sge.qs[0].rspq.lock;
 	if (spin_trylock_irq(lock)) {
-		if (!napi_is_scheduled(qs->netdev)) {
+		if (!napi_is_scheduled(&qs->napi)) {
 			u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
 
 			if (qs->fl[0].credits < qs->fl[0].size)
@@ -2562,12 +2537,9 @@ static void sge_timer_cb(unsigned long data)
  */
 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
 {
-	if (!qs->netdev)
-		return;
-
 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
 	qs->rspq.polling = p->polling;
-	qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
+	qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
 }
 
 /**
@@ -2587,7 +2559,7 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  */
 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		      int irq_vec_idx, const struct qset_params *p,
-		      int ntxq, struct net_device *netdev)
+		      int ntxq, struct net_device *dev)
 {
 	int i, ret = -ENOMEM;
 	struct sge_qset *q = &adapter->sge.qs[id];
@@ -2708,16 +2680,10 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	}
 
 	spin_unlock(&adapter->sge.reg_lock);
-	q->netdev = netdev;
-	t3_update_qset_coalesce(q, p);
 
-	/*
-	 * We use atalk_ptr as a backpointer to a qset.  In case a device is
-	 * associated with multiple queue sets only the first one sets
-	 * atalk_ptr.
-	 */
-	if (netdev->atalk_ptr == NULL)
-		netdev->atalk_ptr = q;
+	q->adap = adapter;
+	q->netdev = dev;
+	t3_update_qset_coalesce(q, p);
 
 	refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
 	refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
author	Stephen Hemminger <shemminger@linux-foundation.org>	2007-10-03 16:41:36 -0700
committer	David S. Miller <davem@sunset.davemloft.net>	2007-10-10 16:47:45 -0700
commit	bea3348eef27e6044b6161fd04c3152215f96411 (patch)
tree	f0990b263e5ce42505d290a4c346fe990bcd4c33 /drivers/net/cxgb3/sge.c
parent	dde4e47e8fe333a5649a3fa0e7db1fa7c08d6158 (diff)