From f17f5c91ae3bfeb5cfc37fa132a5fdfceb8927be Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 14:36:12 -0800 Subject: gro: Check for GSO packets and packets with frag_list As GRO cannot be applied to packets with frag_list we need to make sure that we reject such packets if they are fed to us, e.g., through a tunnel device. Also there is no point in applying GRO on GSO packets so they too should be rejected. This allows GRO to be used in virtio-net which may produce GSO packets directly but may still benefit from GRO if the other end of it doesn't support GSO. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b715a55cccc4..7dec715293b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2392,6 +2392,9 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO)) goto normal; + if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) + goto normal; + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; -- cgit v1.2.3 From f557206800801410c30e53ce7a27219b2c4cf0ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 20:40:03 -0800 Subject: gro: Fix page ref count for skbs freed normally When an skb with page frags is merged into an existing one, we cannibalise its reference count. This is OK when the skb is reused because we set nr_frags to zero in that case. However, for the case where the skb is freed through kfree_skb, we didn't clear nr_frags which causes the page to be freed prematurely. This is fixed by moving the skb resetting into skb_gro_receive. Reported-by: Jeff Kirsher Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7dec715293b1..60377b6c0a80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2491,12 +2491,6 @@ EXPORT_SYMBOL(napi_gro_receive); void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { - skb_shinfo(skb)->nr_frags = 0; - - skb->len -= skb->data_len; - skb->truesize -= skb->data_len; - skb->data_len = 0; - __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); -- cgit v1.2.3 From 937f1ba56b4be37d9e2ad77412f95048662058d2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 21:05:05 -0800 Subject: net: Add init_dummy_netdev() and fix EMAC driver using it This adds an init_dummy_netdev() function that gets a network device structure (allocation and lifetime entirely under caller's control) and initialize the minimum amount of fields so it can be used to schedule NAPI polls without registering a full blown interface. This is to be used by drivers that need to tie several hardware interfaces to a single NAPI poll scheduler due to HW limitations. It also updates the ibm_newemac driver to use that, this fixing the oops on 2.6.29 due to passing NULL as "dev" to netif_napi_add() Symbol is exported GPL only a I don't think we want binary drivers doing that sort of acrobatics (if we want them at all). Signed-off-by: Benjamin Herrenschmidt Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 60377b6c0a80..8d675975d85b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4430,6 +4430,45 @@ err_uninit: goto out; } +/** + * init_dummy_netdev - init a dummy network device for NAPI + * @dev: device to init + * + * This takes a network device structure and initialize the minimum + * amount of fields so it can be used to schedule NAPI polls without + * registering a full blown interface. This is to be used by drivers + * that need to tie several hardware interfaces to a single NAPI + * poll scheduler due to HW limitations. + */ +int init_dummy_netdev(struct net_device *dev) +{ + /* Clear everything. Note we don't initialize spinlocks + * are they aren't supposed to be taken by any of the + * NAPI code and this dummy netdev is supposed to be + * only ever used for NAPI polls + */ + memset(dev, 0, sizeof(struct net_device)); + + /* make sure we BUG if trying to hit standard + * register/unregister code path + */ + dev->reg_state = NETREG_DUMMY; + + /* initialize the ref count */ + atomic_set(&dev->refcnt, 1); + + /* NAPI wants this */ + INIT_LIST_HEAD(&dev->napi_list); + + /* a dummy interface is started by default */ + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + + return 0; +} +EXPORT_SYMBOL_GPL(init_dummy_netdev); + + /** * register_netdev - register a network device * @dev: device to register -- cgit v1.2.3 From 67fd1a731ff1a990d4da7689909317756e50cb4d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Jan 2009 16:26:44 -0800 Subject: net: Add debug info to track down GSO checksum bug I'm trying to track down why people're hitting the checksum warning in skb_gso_segment. As the problem seems to be hitting lots of people and I can't reproduce it or locate the bug, here is a patch to print out more details which hopefully should help us to track this down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8d675975d85b..6e44c3277101 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1534,7 +1534,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + struct net_device *dev = skb->dev; + struct ethtool_drvinfo info = {}; + + if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) + dev->ethtool_ops->get_drvinfo(dev, &info); + + WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " + "ip_summed=%d", + info.driver, dev ? dev->features : 0L, + skb->sk ? skb->sk->sk_route_caps : 0L, + skb->len, skb->data_len, skb->ip_summed); + if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); -- cgit v1.2.3 From 9a8e47ffd95608f0768e1a8a0225c822aa53aa9b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jan 2009 19:47:18 +0000 Subject: gro: Fix error handling on extremely short frags When a frag is shorter than an Ethernet header, we'd return a zeroed packet instead of aborting. This patch fixes that. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6e44c3277101..5379b0c1190a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2536,6 +2536,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, if (!pskb_may_pull(skb, ETH_HLEN)) { napi_reuse_skb(napi, skb); + skb = NULL; goto out; } -- cgit v1.2.3 From b4bd07c20ba0c1fa7ad09ba257e0a5cfc2bf6bb3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 Feb 2009 22:06:43 -0800 Subject: net_dma: call dmaengine_get only if NET_DMA enabled Based upon a patch from Atsushi Nemoto -------------------- The commit 649274d993212e7c23c0cb734572c2311c200872 ("net_dma: acquire/release dma channels on ifup/ifdown") added unconditional call of dmaengine_get() to net_dma. The API should be called only if NET_DMA was enabled. -------------------- Signed-off-by: David S. Miller Acked-by: Dan Williams --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5379b0c1190a..a17e00662363 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1090,7 +1090,7 @@ int dev_open(struct net_device *dev) /* * Enable NET_DMA */ - dmaengine_get(); + net_dmaengine_get(); /* * Initialize multicasting status @@ -1172,7 +1172,7 @@ int dev_close(struct net_device *dev) /* * Shutdown NET_DMA */ - dmaengine_put(); + net_dmaengine_put(); return 0; } -- cgit v1.2.3 From 4ead443163b798661c2a2ede5e512e116a9e41e7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 1 Mar 2009 00:11:52 -0800 Subject: netpoll: Add drop checks to all entry points The netpoll entry checks are required to ensure that we don't receive normal packets when invoked via netpoll. Unfortunately it only ever worked for the netif_receive_skb/netif_rx entry points. The VLAN (and subsequently GRO) entry point didn't have the check and therefore can trigger all sorts of weird problems. This patch adds the netpoll check to all entry points. I'm still uneasy with receiving at all under netpoll (which apparently is only used by the out-of-tree kdump code). The reason is it is perfectly legal to receive all data including headers into highmem if netpoll is off, but if you try to do that with netpoll on and someone gets a printk in an IRQ handler you're going to get a nice BUG_ON. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a17e00662363..72b0d26fd46d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2488,6 +2488,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + if (netpoll_receive_skb(skb)) + return NET_RX_DROP; + switch (__napi_gro_receive(napi, skb)) { case -1: return netif_receive_skb(skb); @@ -2558,6 +2561,9 @@ int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) if (!skb) goto out; + if (netpoll_receive_skb(skb)) + goto out; + err = NET_RX_SUCCESS; switch (__napi_gro_receive(napi, skb)) { -- cgit v1.2.3 From 17edde520927070a6bf14a6a75027c0b843443e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:11:09 -0800 Subject: netns: Remove net_alive It turns out that net_alive is unnecessary, and the original problem that led to it being added was simply that the icmp code thought it was a network device and wound up being unable to handle packets while there were still packets in the network namespace. Now that icmp and tcp have been fixed to properly register themselves this problem is no longer present and we have a stronger guarantee that packets will not arrive in a network namespace then that provided by net_alive in netif_receive_skb. So remove net_alive allowing packet reception run a little faster. Additionally document the strong reason why network namespace cleanup is safe so that if something happens again someone else will have a chance of figuring it out. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 72b0d26fd46d..9e4afe650e7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2267,12 +2267,6 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); - /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) { - kfree_skb(skb); - goto out; - } - #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); -- cgit v1.2.3 From 54acd0efab072cb70e87206329d561b297f93bbb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Mar 2009 23:01:02 -0800 Subject: net: Fix missing dev->neigh_setup in register_netdevice(). Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 9e4afe650e7a..2dd484ed3dbb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4339,6 +4339,7 @@ int register_netdevice(struct net_device *dev) dev->do_ioctl = ops->ndo_do_ioctl; dev->set_config = ops->ndo_set_config; dev->change_mtu = ops->ndo_change_mtu; + dev->neigh_setup = ops->ndo_neigh_setup; dev->tx_timeout = ops->ndo_tx_timeout; dev->get_stats = ops->ndo_get_stats; dev->vlan_rx_register = ops->ndo_vlan_rx_register; -- cgit v1.2.3 From 9d40bbda599def1e1d155d7f7dca14fe8744bd2b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Mar 2009 23:46:25 -0800 Subject: vlan: Fix vlan-in-vlan crashes. As analyzed by Patrick McHardy, vlan needs to reset it's netdev_ops pointer in it's ->init() function but this leaves the compat method pointers stale. Add a netdev_resync_ops() and call it from the vlan code. Any other driver which changes ->netdev_ops after register_netdevice() will need to call this new function after doing so too. With help from Patrick McHardy. Tested-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 56 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 22 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2dd484ed3dbb..f1129706ce7b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4282,6 +4282,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +/* Some devices need to (re-)set their netdev_ops inside + * ->init() or similar. If that happens, we have to setup + * the compat pointers again. + */ +void netdev_resync_ops(struct net_device *dev) +{ +#ifdef CONFIG_COMPAT_NET_DEV_OPS + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->neigh_setup = ops->ndo_neigh_setup; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif +#endif +} +EXPORT_SYMBOL(netdev_resync_ops); + /** * register_netdevice - register a network device * @dev: device to register @@ -4326,28 +4359,7 @@ int register_netdevice(struct net_device *dev) * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->neigh_setup = ops->ndo_neigh_setup; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif + netdev_resync_ops(dev); } else { char drivername[64]; pr_info("%s (%s): not using net_device_ops yet\n", -- cgit v1.2.3