From 2ba05622b8b143b0c95968ba59bddfbd6d2f2559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:14 -0700 Subject: dmaengine: provide a common 'issue_pending_all' implementation async_tx and net_dma each have open-coded versions of issue_pending_all, so provide a common routine in dmaengine. The implementation needs to walk the global device list, so implement rcu to allow dma_issue_pending_all to run lockless. Clients protect themselves from channel removal events by holding a dmaengine reference. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- net/core/dev.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 09c66a449da6..e40b0d57f8ff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2635,14 +2635,7 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (!cpus_empty(net_dma.channel_mask)) { - int chan_idx; - for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { - struct dma_chan *chan = net_dma.channels[chan_idx]; - if (chan) - dma_async_memcpy_issue_pending(chan); - } - } + dma_issue_pending_all(); #endif return; -- cgit v1.2.3 From f67b45999205164958de4ec0658d51fa4bee066d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:15 -0700 Subject: net_dma: convert to dma_find_channel Use the general-purpose channel allocation provided by dmaengine. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- net/core/dev.c | 40 ---------------------------------------- 1 file changed, 40 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e40b0d57f8ff..bbb07dbe1740 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4827,44 +4827,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, } #ifdef CONFIG_NET_DMA -/** - * net_dma_rebalance - try to maintain one DMA channel per CPU - * @net_dma: DMA client and associated data (lock, channels, channel_mask) - * - * This is called when the number of channels allocated to the net_dma client - * changes. The net_dma client tries to have one DMA channel per CPU. - */ - -static void net_dma_rebalance(struct net_dma *net_dma) -{ - unsigned int cpu, i, n, chan_idx; - struct dma_chan *chan; - - if (cpus_empty(net_dma->channel_mask)) { - for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); - return; - } - - i = 0; - cpu = first_cpu(cpu_online_map); - - for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { - chan = net_dma->channels[chan_idx]; - - n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) - + (i < (num_online_cpus() % - cpus_weight(net_dma->channel_mask)) ? 1 : 0)); - - while(n) { - per_cpu(softnet_data, cpu).net_dma = chan; - cpu = next_cpu(cpu, cpu_online_map); - n--; - } - i++; - } -} - /** * netdev_dma_event - event callback for the net_dma_client * @client: should always be net_dma_client @@ -4894,7 +4856,6 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, ack = DMA_ACK; net_dma->channels[pos] = chan; cpu_set(pos, net_dma->channel_mask); - net_dma_rebalance(net_dma); } break; case DMA_RESOURCE_REMOVED: @@ -4909,7 +4870,6 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, ack = DMA_ACK; cpu_clear(pos, net_dma->channel_mask); net_dma->channels[i] = NULL; - net_dma_rebalance(net_dma); } break; default: -- cgit v1.2.3 From 209b84a88fe81341b4d8d465acc4a67cb7c3feb3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: replace dma_async_client_register with dmaengine_get Now that clients no longer need to be notified of channel arrival dma_async_client_register can simply increment the dmaengine_ref_count. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index bbb07dbe1740..7596fc9403c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4894,8 +4894,7 @@ static int __init netdev_dma_register(void) } spin_lock_init(&net_dma.lock); dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); - dma_async_client_register(&net_dma.client); - dma_async_client_chan_request(&net_dma.client); + dmaengine_get(); return 0; } -- cgit v1.2.3 From aa1e6f1a385eb2b04171ec841f3b760091e4a8ee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jan 2009 11:38:17 -0700 Subject: dmaengine: kill struct dma_client and supporting infrastructure All users have been converted to either the general-purpose allocator, dma_find_channel, or dma_request_channel. Reviewed-by: Andrew Morton Signed-off-by: Dan Williams --- net/core/dev.c | 99 ++-------------------------------------------------------- 1 file changed, 3 insertions(+), 96 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7596fc9403c8..ac55d84d6255 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -167,25 +167,6 @@ static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ -#ifdef CONFIG_NET_DMA -struct net_dma { - struct dma_client client; - spinlock_t lock; - cpumask_t channel_mask; - struct dma_chan **channels; -}; - -static enum dma_state_client -netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_state state); - -static struct net_dma net_dma = { - .client = { - .event_callback = netdev_dma_event, - }, -}; -#endif - /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. @@ -4826,81 +4807,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#ifdef CONFIG_NET_DMA -/** - * netdev_dma_event - event callback for the net_dma_client - * @client: should always be net_dma_client - * @chan: DMA channel for the event - * @state: DMA state to be handled - */ -static enum dma_state_client -netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_state state) -{ - int i, found = 0, pos = -1; - struct net_dma *net_dma = - container_of(client, struct net_dma, client); - enum dma_state_client ack = DMA_DUP; /* default: take no action */ - - spin_lock(&net_dma->lock); - switch (state) { - case DMA_RESOURCE_AVAILABLE: - for (i = 0; i < nr_cpu_ids; i++) - if (net_dma->channels[i] == chan) { - found = 1; - break; - } else if (net_dma->channels[i] == NULL && pos < 0) - pos = i; - - if (!found && pos >= 0) { - ack = DMA_ACK; - net_dma->channels[pos] = chan; - cpu_set(pos, net_dma->channel_mask); - } - break; - case DMA_RESOURCE_REMOVED: - for (i = 0; i < nr_cpu_ids; i++) - if (net_dma->channels[i] == chan) { - found = 1; - pos = i; - break; - } - - if (found) { - ack = DMA_ACK; - cpu_clear(pos, net_dma->channel_mask); - net_dma->channels[i] = NULL; - } - break; - default: - break; - } - spin_unlock(&net_dma->lock); - - return ack; -} - -/** - * netdev_dma_register - register the networking subsystem as a DMA client - */ -static int __init netdev_dma_register(void) -{ - net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma), - GFP_KERNEL); - if (unlikely(!net_dma.channels)) { - printk(KERN_NOTICE - "netdev_dma: no memory for net_dma.channels\n"); - return -ENOMEM; - } - spin_lock_init(&net_dma.lock); - dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); - dmaengine_get(); - return 0; -} - -#else -static int __init netdev_dma_register(void) { return -ENODEV; } -#endif /* CONFIG_NET_DMA */ /** * netdev_increment_features - increment feature set by one @@ -5120,14 +5026,15 @@ static int __init net_dev_init(void) if (register_pernet_device(&default_device_ops)) goto out; - netdev_dma_register(); - open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); dev_mcast_init(); + #ifdef CONFIG_NET_DMA + dmaengine_get(); + #endif rc = 0; out: return rc; -- cgit v1.2.3 From 649274d993212e7c23c0cb734572c2311c200872 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 11 Jan 2009 00:20:39 -0800 Subject: net_dma: acquire/release dma channels on ifup/ifdown The recent dmaengine rework removed the capability to remove dma device driver modules while net_dma is active. Rather than notify dmaengine-clients that channels are trying to be removed, we now rely on clients to notify dmaengine when they no longer have a need for channels. Teach net_dma to release channels by taking dmaengine references at netdevice open and dropping references at netdevice close. Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5f736f1ceeae..b715a55cccc4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1087,6 +1087,11 @@ int dev_open(struct net_device *dev) */ dev->flags |= IFF_UP; + /* + * Enable NET_DMA + */ + dmaengine_get(); + /* * Initialize multicasting status */ @@ -1164,6 +1169,11 @@ int dev_close(struct net_device *dev) */ call_netdevice_notifiers(NETDEV_DOWN, dev); + /* + * Shutdown NET_DMA + */ + dmaengine_put(); + return 0; } @@ -5151,9 +5161,6 @@ static int __init net_dev_init(void) hotcpu_notifier(dev_cpu_callback, 0); dst_init(); dev_mcast_init(); - #ifdef CONFIG_NET_DMA - dmaengine_get(); - #endif rc = 0; out: return rc; -- cgit v1.2.3 From f17f5c91ae3bfeb5cfc37fa132a5fdfceb8927be Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 14:36:12 -0800 Subject: gro: Check for GSO packets and packets with frag_list As GRO cannot be applied to packets with frag_list we need to make sure that we reject such packets if they are fed to us, e.g., through a tunnel device. Also there is no point in applying GRO on GSO packets so they too should be rejected. This allows GRO to be used in virtio-net which may produce GSO packets directly but may still benefit from GRO if the other end of it doesn't support GSO. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b715a55cccc4..7dec715293b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2392,6 +2392,9 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO)) goto normal; + if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) + goto normal; + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; -- cgit v1.2.3 From f557206800801410c30e53ce7a27219b2c4cf0ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 20:40:03 -0800 Subject: gro: Fix page ref count for skbs freed normally When an skb with page frags is merged into an existing one, we cannibalise its reference count. This is OK when the skb is reused because we set nr_frags to zero in that case. However, for the case where the skb is freed through kfree_skb, we didn't clear nr_frags which causes the page to be freed prematurely. This is fixed by moving the skb resetting into skb_gro_receive. Reported-by: Jeff Kirsher Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7dec715293b1..60377b6c0a80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2491,12 +2491,6 @@ EXPORT_SYMBOL(napi_gro_receive); void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { - skb_shinfo(skb)->nr_frags = 0; - - skb->len -= skb->data_len; - skb->truesize -= skb->data_len; - skb->data_len = 0; - __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); -- cgit v1.2.3 From 937f1ba56b4be37d9e2ad77412f95048662058d2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 21:05:05 -0800 Subject: net: Add init_dummy_netdev() and fix EMAC driver using it This adds an init_dummy_netdev() function that gets a network device structure (allocation and lifetime entirely under caller's control) and initialize the minimum amount of fields so it can be used to schedule NAPI polls without registering a full blown interface. This is to be used by drivers that need to tie several hardware interfaces to a single NAPI poll scheduler due to HW limitations. It also updates the ibm_newemac driver to use that, this fixing the oops on 2.6.29 due to passing NULL as "dev" to netif_napi_add() Symbol is exported GPL only a I don't think we want binary drivers doing that sort of acrobatics (if we want them at all). Signed-off-by: Benjamin Herrenschmidt Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 60377b6c0a80..8d675975d85b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4430,6 +4430,45 @@ err_uninit: goto out; } +/** + * init_dummy_netdev - init a dummy network device for NAPI + * @dev: device to init + * + * This takes a network device structure and initialize the minimum + * amount of fields so it can be used to schedule NAPI polls without + * registering a full blown interface. This is to be used by drivers + * that need to tie several hardware interfaces to a single NAPI + * poll scheduler due to HW limitations. + */ +int init_dummy_netdev(struct net_device *dev) +{ + /* Clear everything. Note we don't initialize spinlocks + * are they aren't supposed to be taken by any of the + * NAPI code and this dummy netdev is supposed to be + * only ever used for NAPI polls + */ + memset(dev, 0, sizeof(struct net_device)); + + /* make sure we BUG if trying to hit standard + * register/unregister code path + */ + dev->reg_state = NETREG_DUMMY; + + /* initialize the ref count */ + atomic_set(&dev->refcnt, 1); + + /* NAPI wants this */ + INIT_LIST_HEAD(&dev->napi_list); + + /* a dummy interface is started by default */ + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + + return 0; +} +EXPORT_SYMBOL_GPL(init_dummy_netdev); + + /** * register_netdev - register a network device * @dev: device to register -- cgit v1.2.3 From 67fd1a731ff1a990d4da7689909317756e50cb4d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Jan 2009 16:26:44 -0800 Subject: net: Add debug info to track down GSO checksum bug I'm trying to track down why people're hitting the checksum warning in skb_gso_segment. As the problem seems to be hitting lots of people and I can't reproduce it or locate the bug, here is a patch to print out more details which hopefully should help us to track this down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8d675975d85b..6e44c3277101 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1534,7 +1534,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + struct net_device *dev = skb->dev; + struct ethtool_drvinfo info = {}; + + if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) + dev->ethtool_ops->get_drvinfo(dev, &info); + + WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " + "ip_summed=%d", + info.driver, dev ? dev->features : 0L, + skb->sk ? skb->sk->sk_route_caps : 0L, + skb->len, skb->data_len, skb->ip_summed); + if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); -- cgit v1.2.3 From 9a8e47ffd95608f0768e1a8a0225c822aa53aa9b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jan 2009 19:47:18 +0000 Subject: gro: Fix error handling on extremely short frags When a frag is shorter than an Ethernet header, we'd return a zeroed packet instead of aborting. This patch fixes that. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6e44c3277101..5379b0c1190a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2536,6 +2536,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, if (!pskb_may_pull(skb, ETH_HLEN)) { napi_reuse_skb(napi, skb); + skb = NULL; goto out; } -- cgit v1.2.3