diff options
author | Xinyu Chen <xinyu.chen@freescale.com> | 2012-07-25 16:54:33 +0800 |
---|---|---|
committer | Xinyu Chen <xinyu.chen@freescale.com> | 2012-07-25 16:54:33 +0800 |
commit | 10ca2f12149b8c3fde9af51da89736529892dc69 (patch) | |
tree | 1a5118db41796df0a40c20467f37542e957e87a0 /net | |
parent | 3fb99edfabe05a47803eba7f39109b5eb86e25df (diff) | |
parent | cf3095062b75f6e518c6ef8a25b47a5b2ced7668 (diff) |
Merge remote branch 'fsl-linux-sdk/imx_3.0.35' into imx_3.0.35_android
Conflicts:
arch/arm/configs/imx6_defconfig
arch/arm/configs/imx6_updater_defconfig
arch/arm/configs/imx6s_defconfig
arch/arm/include/asm/dma-mapping.h
arch/arm/kernel/smp.c
arch/arm/mach-mx6/Kconfig
arch/arm/mach-mx6/board-mx6dl_arm2.h
arch/arm/mach-mx6/board-mx6dl_sabresd.h
arch/arm/mach-mx6/board-mx6q_arm2.c
arch/arm/mach-mx6/board-mx6q_arm2.h
arch/arm/mach-mx6/board-mx6q_sabreauto.c
arch/arm/mach-mx6/board-mx6q_sabreauto.h
arch/arm/mach-mx6/board-mx6q_sabrelite.c
arch/arm/mach-mx6/board-mx6q_sabresd.c
arch/arm/mach-mx6/board-mx6q_sabresd.h
arch/arm/mach-mx6/board-mx6sl_arm2.c
arch/arm/mach-mx6/board-mx6sl_arm2.h
arch/arm/mach-mx6/board-mx6solo_sabreauto.h
arch/arm/mach-mx6/bus_freq.c
arch/arm/mach-mx6/clock.c
arch/arm/mach-mx6/clock_mx6sl.c
arch/arm/mach-mx6/cpu.c
arch/arm/mach-mx6/crm_regs.h
arch/arm/mach-mx6/devices-imx6q.h
arch/arm/mach-mx6/devices.c
arch/arm/mach-mx6/mx6_anatop_regulator.c
arch/arm/mach-mx6/pcie.c
arch/arm/mach-mx6/system.c
arch/arm/mm/dma-mapping.c
arch/arm/plat-mxc/devices/Makefile
arch/arm/plat-mxc/devices/platform-imx-dcp.c
arch/arm/plat-mxc/devices/platform-imx-ocotp.c
arch/arm/plat-mxc/devices/platform-imx-rngb.c
arch/arm/plat-mxc/devices/platform-mxc_hdmi.c
arch/arm/plat-mxc/include/mach/devices-common.h
arch/arm/plat-mxc/include/mach/esdhc.h
arch/arm/plat-mxc/include/mach/iomux-mx6dl.h
arch/arm/plat-mxc/include/mach/iomux-mx6q.h
arch/arm/plat-mxc/include/mach/memory.h
arch/arm/plat-mxc/include/mach/mx6.h
arch/arm/plat-mxc/include/mach/mxc_edid.h
arch/arm/plat-mxc/include/mach/mxc_hdmi.h
arch/arm/plat-mxc/system.c
drivers/Kconfig
drivers/char/hw_random/fsl-rngc.c
drivers/cpufreq/Makefile
drivers/cpufreq/cpufreq_interactive.c
drivers/crypto/Kconfig
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/compat.h
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/desc_constr.h
drivers/crypto/caam/intern.h
drivers/crypto/dcp.c
drivers/dma/pch_dma.c
drivers/input/keyboard/gpio_keys.c
drivers/input/touchscreen/egalax_ts.c
drivers/input/touchscreen/max11801_ts.c
drivers/media/video/mxc/capture/Kconfig
drivers/media/video/mxc/capture/adv7180.c
drivers/media/video/mxc/capture/ipu_csi_enc.c
drivers/media/video/mxc/capture/ipu_prp_vf_sdc.c
drivers/media/video/mxc/capture/ipu_prp_vf_sdc_bg.c
drivers/media/video/mxc/capture/mxc_v4l2_capture.c
drivers/media/video/mxc/capture/ov5640_mipi.c
drivers/media/video/mxc/output/mxc_vout.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/mmc/card/block.c
drivers/mmc/core/mmc.c
drivers/mmc/host/mmci.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mxc/Kconfig
drivers/mxc/Makefile
drivers/mxc/asrc/mxc_asrc.c
drivers/mxc/gpu-viv/arch/XAQ2/hal/kernel/gc_hal_kernel_context.c
drivers/mxc/gpu-viv/arch/XAQ2/hal/kernel/gc_hal_kernel_hardware.c
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel.c
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel.h
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel_command.c
drivers/mxc/gpu-viv/hal/kernel/gc_hal_kernel_event.c
drivers/mxc/gpu-viv/hal/kernel/inc/gc_hal.h
drivers/mxc/gpu-viv/hal/kernel/inc/gc_hal_base.h
drivers/mxc/gpu-viv/hal/kernel/inc/gc_hal_options.h
drivers/mxc/gpu-viv/hal/os/linux/kernel/gc_hal_kernel_os.c
drivers/mxc/ipu3/ipu_device.c
drivers/mxc/vpu/mxc_vpu.c
drivers/net/fec.c
drivers/net/wireless/Makefile
drivers/power/sabresd_battery.c
drivers/regulator/core.c
drivers/tty/serial/imx.c
drivers/usb/core/hub.c
drivers/usb/gadget/arcotg_udc.c
drivers/usb/gadget/fsl_updater.c
drivers/usb/gadget/inode.c
drivers/usb/host/ehci-hub.c
drivers/video/mxc/ldb.c
drivers/video/mxc/mipi_dsi.c
drivers/video/mxc/mxc_dispdrv.c
drivers/video/mxc/mxc_dispdrv.h
drivers/video/mxc/mxc_edid.c
drivers/video/mxc/mxc_elcdif_fb.c
drivers/video/mxc/mxc_ipuv3_fb.c
drivers/video/mxc/mxc_spdc_fb.c
drivers/video/mxc_hdmi.c
drivers/watchdog/imx2_wdt.c
fs/proc/base.c
include/linux/mmc/host.h
include/linux/mmc/sdhci.h
include/linux/mxc_v4l2.h
kernel/power/main.c
sound/soc/codecs/mxc_hdmi.c
sound/soc/codecs/mxc_spdif.c
sound/soc/codecs/wm8962.c
sound/soc/imx/Kconfig
sound/soc/imx/Makefile
sound/soc/imx/imx-cs42888.c
sound/soc/imx/imx-esai.c
sound/soc/imx/imx-wm8958.c
sound/soc/imx/imx-wm8962.c
Diffstat (limited to 'net')
96 files changed, 994 insertions, 643 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 5b4f51d440f4..d54845618c2a 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -154,7 +154,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb_set_dev(skb, vlan_dev_info(dev)->real_dev); + skb->dev = vlan_dev_info(dev)->real_dev; len = skb->len; ret = dev_queue_xmit(skb); diff --git a/net/atm/clip.c b/net/atm/clip.c index 1d4be60e1390..5889074e9718 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -364,33 +364,37 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct clip_priv *clip_priv = PRIV(dev); + struct dst_entry *dst = skb_dst(skb); struct atmarp_entry *entry; + struct neighbour *n; struct atm_vcc *vcc; int old; unsigned long flags; pr_debug("(skb %p)\n", skb); - if (!skb_dst(skb)) { + if (!dst) { pr_err("skb_dst(skb) == NULL\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - if (!skb_dst(skb)->neighbour) { + n = dst_get_neighbour(dst); + if (!n) { #if 0 - skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1); - if (!skb_dst(skb)->neighbour) { + n = clip_find_neighbour(skb_dst(skb), 1); + if (!n) { dev_kfree_skb(skb); /* lost that one */ dev->stats.tx_dropped++; return 0; } + dst_set_neighbour(dst, n); #endif pr_err("NO NEIGHBOUR !\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - entry = NEIGH2ENTRY(skb_dst(skb)->neighbour); + entry = NEIGH2ENTRY(n); if (!entry->vccs) { if (time_after(jiffies, entry->expires)) { /* should be resolved */ @@ -407,7 +411,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, } pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", n, vcc); if (entry->vccs->encap) { void *here; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e7c69f4619ec..b04a6ef4da94 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -2006,16 +2006,17 @@ static void __exit ax25_exit(void) proc_net_remove(&init_net, "ax25_route"); proc_net_remove(&init_net, "ax25"); proc_net_remove(&init_net, "ax25_calls"); - ax25_rt_free(); - ax25_uid_free(); - ax25_dev_free(); - ax25_unregister_sysctl(); unregister_netdevice_notifier(&ax25_dev_notifier); + ax25_unregister_sysctl(); dev_remove_pack(&ax25_packet_type); sock_unregister(PF_AX25); proto_unregister(&ax25_proto); + + ax25_rt_free(); + ax25_uid_free(); + ax25_dev_free(); } module_exit(ax25_exit); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3b3919864078..f38e633c7546 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -510,6 +510,11 @@ int hci_dev_open(__u16 dev) hci_req_lock(hdev); + if (test_bit(HCI_UNREGISTER, &hdev->flags)) { + ret = -ENODEV; + goto done; + } + if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { ret = -ERFKILL; goto done; @@ -1563,6 +1568,8 @@ int hci_unregister_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); + set_bit(HCI_UNREGISTER, &hdev->flags); + write_lock_bh(&hci_dev_list_lock); list_del(&hdev->list); write_unlock_bh(&hci_dev_list_lock); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 995cbe0ac0b2..e78269d798c0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -241,7 +241,6 @@ static void br_multicast_group_expired(unsigned long data) hlist_del_rcu(&mp->hlist[mdb->ver]); mdb->size--; - del_timer(&mp->query_timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); out: @@ -271,7 +270,6 @@ static void br_multicast_del_pg(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - del_timer(&p->query_timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->mglist && @@ -446,8 +444,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); - ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, - &ip6h->saddr); + if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr)) { + kfree_skb(skb); + return NULL; + } ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -504,74 +505,6 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, return NULL; } -static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp) -{ - struct net_bridge *br = mp->br; - struct sk_buff *skb; - - skb = br_multicast_alloc_query(br, &mp->addr); - if (!skb) - goto timer; - - netif_rx(skb); - -timer: - if (++mp->queries_sent < br->multicast_last_member_count) - mod_timer(&mp->query_timer, - jiffies + br->multicast_last_member_interval); -} - -static void br_multicast_group_query_expired(unsigned long data) -{ - struct net_bridge_mdb_entry *mp = (void *)data; - struct net_bridge *br = mp->br; - - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || !mp->mglist || - mp->queries_sent >= br->multicast_last_member_count) - goto out; - - br_multicast_send_group_query(mp); - -out: - spin_unlock(&br->multicast_lock); -} - -static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg) -{ - struct net_bridge_port *port = pg->port; - struct net_bridge *br = port->br; - struct sk_buff *skb; - - skb = br_multicast_alloc_query(br, &pg->addr); - if (!skb) - goto timer; - - br_deliver(port, skb); - -timer: - if (++pg->queries_sent < br->multicast_last_member_count) - mod_timer(&pg->query_timer, - jiffies + br->multicast_last_member_interval); -} - -static void br_multicast_port_group_query_expired(unsigned long data) -{ - struct net_bridge_port_group *pg = (void *)data; - struct net_bridge_port *port = pg->port; - struct net_bridge *br = port->br; - - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || - pg->queries_sent >= br->multicast_last_member_count) - goto out; - - br_multicast_send_port_group_query(pg); - -out: - spin_unlock(&br->multicast_lock); -} - static struct net_bridge_mdb_entry *br_multicast_get_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, int hash) @@ -687,8 +620,6 @@ rehash: mp->addr = *group; setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); - setup_timer(&mp->query_timer, br_multicast_group_query_expired, - (unsigned long)mp); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -743,8 +674,6 @@ static int br_multicast_add_group(struct net_bridge *br, hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, (unsigned long)p); - setup_timer(&p->query_timer, br_multicast_port_group_query_expired, - (unsigned long)p); rcu_assign_pointer(*pp, p); @@ -1288,9 +1217,6 @@ static void br_multicast_leave_group(struct net_bridge *br, time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); - - mp->queries_sent = 0; - mod_timer(&mp->query_timer, now); } goto out; @@ -1307,9 +1233,6 @@ static void br_multicast_leave_group(struct net_bridge *br, time_after(p->timer.expires, time) : try_to_del_timer_sync(&p->timer) >= 0)) { mod_timer(&p->timer, time); - - p->queries_sent = 0; - mod_timer(&p->query_timer, now); } break; @@ -1675,7 +1598,6 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); - del_timer(&mp->query_timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 56149ec36d7f..3dc7f5446a9d 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -343,24 +343,26 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct neighbour *neigh; struct dst_entry *dst; skb->dev = bridge_parent(skb->dev); if (!skb->dev) goto free_skb; dst = skb_dst(skb); + neigh = dst_get_neighbour(dst); if (dst->hh) { neigh_hh_bridge(dst->hh, skb); skb->dev = nf_bridge->physindev; return br_handle_frame_finish(skb); - } else if (dst->neighbour) { + } else if (neigh) { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and * protocol number. */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; - return dst->neighbour->output(skb); + return neigh->output(skb); } free_skb: kfree_skb(skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 857a021deea9..1ca1b1c7560e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -77,9 +77,7 @@ struct net_bridge_port_group { struct hlist_node mglist; struct rcu_head rcu; struct timer_list timer; - struct timer_list query_timer; struct br_ip addr; - u32 queries_sent; }; struct net_bridge_mdb_entry @@ -89,10 +87,8 @@ struct net_bridge_mdb_entry struct net_bridge_port_group __rcu *ports; struct rcu_head rcu; struct timer_list timer; - struct timer_list query_timer; struct br_ip addr; bool mglist; - u32 queries_sent; }; struct net_bridge_mdb_htable diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index dbdaa95b8005..5ba4366a220d 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -53,7 +53,6 @@ struct cfcnfg *get_cfcnfg(struct net *net) struct caif_net *caifn; BUG_ON(!net); caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); return caifn->cfg; } EXPORT_SYMBOL(get_cfcnfg); @@ -63,7 +62,6 @@ static struct caif_device_entry_list *caif_device_list(struct net *net) struct caif_net *caifn; BUG_ON(!net); caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); return &caifn->caifdevs; } @@ -92,7 +90,6 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) struct caif_device_entry *caifd; caifdevs = caif_device_list(dev_net(dev)); - BUG_ON(!caifdevs); caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); if (!caifd) @@ -108,7 +105,7 @@ static struct caif_device_entry *caif_get(struct net_device *dev) struct caif_device_entry_list *caifdevs = caif_device_list(dev_net(dev)); struct caif_device_entry *caifd; - BUG_ON(!caifdevs); + list_for_each_entry_rcu(caifd, &caifdevs->list, list) { if (caifd->netdev == dev) return caifd; @@ -349,7 +346,7 @@ static struct notifier_block caif_device_notifier = { static int caif_init_net(struct net *net) { struct caif_net *caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); + INIT_LIST_HEAD(&caifn->caifdevs.list); mutex_init(&caifn->caifdevs.lock); @@ -414,7 +411,7 @@ static int __init caif_device_init(void) { int result; - result = register_pernet_device(&caif_net_ops); + result = register_pernet_subsys(&caif_net_ops); if (result) return result; @@ -427,7 +424,7 @@ static int __init caif_device_init(void) static void __exit caif_device_exit(void) { - unregister_pernet_device(&caif_net_ops); + unregister_pernet_subsys(&caif_net_ops); unregister_netdevice_notifier(&caif_device_notifier); dev_remove_pack(&caif_packet_type); } diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 52fe33bee029..bca32d7c15c9 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -313,7 +313,6 @@ int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, int err; struct cfctrl_link_param param; struct cfcnfg *cfg = get_cfcnfg(net); - caif_assert(cfg != NULL); rcu_read_lock(); err = caif_connect_req_to_link_param(cfg, conn_req, ¶m); diff --git a/net/core/dev.c b/net/core/dev.c index f14f6015a7ab..a71eafc392e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1406,14 +1406,34 @@ EXPORT_SYMBOL(register_netdevice_notifier); * register_netdevice_notifier(). The notifier is unlinked into the * kernel structures and may then be reused. A negative errno code * is returned on a failure. + * + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. */ int unregister_netdevice_notifier(struct notifier_block *nb) { + struct net_device *dev; + struct net *net; int err; rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_chain, nb); + if (err) + goto unlock; + + for_each_net(net) { + for_each_netdev(net, dev) { + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); + } + } +unlock: rtnl_unlock(); return err; } @@ -1513,10 +1533,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } - skb_set_dev(skb, dev); + skb->dev = dev; + skb_dst_drop(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1771,36 +1795,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -/** - * skb_dev_set -- assign a new device to a buffer - * @skb: buffer for the new device - * @dev: network device - * - * If an skb is owned by a device already, we have to reset - * all data private to the namespace a device belongs to - * before assigning it a new device. - */ -#ifdef CONFIG_NET_NS -void skb_set_dev(struct sk_buff *skb, struct net_device *dev) -{ - skb_dst_drop(skb); - if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { - secpath_reset(skb); - nf_reset(skb); - skb_init_secmark(skb); - skb->mark = 0; - skb->priority = 0; - skb->nf_trace = 0; - skb->ipvs_property = 0; -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif - } - skb->dev = dev; -} -EXPORT_SYMBOL(skb_set_dev); -#endif /* CONFIG_NET_NS */ - /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. @@ -3434,14 +3428,20 @@ static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } @@ -3498,7 +3498,8 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { __skb_pull(skb, skb_headlen(skb)); - skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); + /* restore the reserve we had after netdev_alloc_skb_ip_align() */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; diff --git a/net/core/dst.c b/net/core/dst.c index 6135f3671692..8246d47a2184 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); dst->hh = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; @@ -231,7 +231,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); hh = dst->hh; child = dst->child; @@ -240,7 +240,7 @@ again: hh_cache_put(hh); if (neigh) { - dst->neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -367,14 +367,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16db88707804..96bb0a33f861 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -823,6 +823,8 @@ next_elt: write_unlock_bh(&tbl->lock); cond_resched(); write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); } /* Cycle through all hash buckets every base_reachable_time/2 ticks. * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 @@ -1173,12 +1175,17 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); + + rcu_read_lock(); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb_dst(skb) && skb_dst(skb)->neighbour) - n1 = skb_dst(skb)->neighbour; + if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + n1 = n2; n1->output(skb); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); @@ -1300,10 +1307,10 @@ EXPORT_SYMBOL(neigh_compat_output); int neigh_resolve_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh; + struct neighbour *neigh = dst_get_neighbour(dst); int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; __skb_pull(skb, skb_network_offset(skb)); @@ -1333,7 +1340,7 @@ out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1347,7 +1354,7 @@ int neigh_connected_output(struct sk_buff *skb) { int err; struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; unsigned int seq; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ea489db1bc23..2772ed11bec9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,6 +29,20 @@ EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ +static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; + +static struct net_generic *net_alloc_generic(void) +{ + struct net_generic *ng; + size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + ng = kzalloc(generic_size, GFP_KERNEL); + if (ng) + ng->len = max_gen_ptrs; + + return ng; +} + static int net_assign_generic(struct net *net, int id, void *data) { struct net_generic *ng, *old_ng; @@ -42,8 +56,7 @@ static int net_assign_generic(struct net *net, int id, void *data) if (old_ng->len >= id) goto assign; - ng = kzalloc(sizeof(struct net_generic) + - id * sizeof(void *), GFP_KERNEL); + ng = net_alloc_generic(); if (ng == NULL) return -ENOMEM; @@ -58,7 +71,6 @@ static int net_assign_generic(struct net *net, int id, void *data) * the old copy for kfree after a grace period. */ - ng->len = id; memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); rcu_assign_pointer(net->gen, ng); @@ -70,21 +82,29 @@ assign: static int ops_init(const struct pernet_operations *ops, struct net *net) { - int err; + int err = -ENOMEM; + void *data = NULL; + if (ops->id && ops->size) { - void *data = kzalloc(ops->size, GFP_KERNEL); + data = kzalloc(ops->size, GFP_KERNEL); if (!data) - return -ENOMEM; + goto out; err = net_assign_generic(net, *ops->id, data); - if (err) { - kfree(data); - return err; - } + if (err) + goto cleanup; } + err = 0; if (ops->init) - return ops->init(net); - return 0; + err = ops->init(net); + if (!err) + return 0; + +cleanup: + kfree(data); + +out: + return err; } static void ops_free(const struct pernet_operations *ops, struct net *net) @@ -159,18 +179,6 @@ out_undo: goto out; } -static struct net_generic *net_alloc_generic(void) -{ - struct net_generic *ng; - size_t generic_size = sizeof(struct net_generic) + - INITIAL_NET_GEN_PTRS * sizeof(void *); - - ng = kzalloc(generic_size, GFP_KERNEL); - if (ng) - ng->len = INITIAL_NET_GEN_PTRS; - - return ng; -} #ifdef CONFIG_NET_NS static struct kmem_cache *net_cachep; @@ -446,12 +454,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - int err = 0; - err = ops_init(ops, &init_net); - if (err) - ops_free(ops, &init_net); - return err; - + return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) @@ -481,6 +484,7 @@ again: } return error; } + max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); } error = __register_pernet_operations(list, ops); if (error) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 18d9cbda3a39..05db410fd135 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -193,7 +193,7 @@ void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); - if (dev->priv_flags & IFF_SLAVE) { + if (dev->flags & IFF_SLAVE) { if (dev->npinfo) { struct net_device *bond_dev = dev->master; struct sk_buff *skb; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e35a6fbb8110..c0e0f7679e75 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1932,7 +1932,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -3755,12 +3755,18 @@ static void __exit pg_cleanup(void) { struct pktgen_thread *t; struct list_head *q, *n; + LIST_HEAD(list); /* Stop all interfaces & threads */ pktgen_exiting = true; - list_for_each_safe(q, n, &pktgen_threads) { + mutex_lock(&pktgen_thread_lock); + list_splice_init(&pktgen_threads, &list); + mutex_unlock(&pktgen_thread_lock); + + list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); + list_del(&t->th_list); kthread_stop(t->tsk); kfree(t); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46cbd28f40f9..4821df84eba3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2985,6 +2985,8 @@ static void sock_rmem_free(struct sk_buff *skb) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { + int len = skb->len; + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) return -ENOMEM; @@ -2999,7 +3001,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk, len); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); diff --git a/net/core/sock.c b/net/core/sock.c index aebb419519b3..b4bb59a9245b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1501,6 +1501,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, gfp_t gfp_mask; long timeo; int err; + int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + err = -EMSGSIZE; + if (npages > MAX_SKB_FRAGS) + goto failure; gfp_mask = sk->sk_allocation; if (gfp_mask & __GFP_WAIT) @@ -1519,14 +1524,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { skb = alloc_skb(header_len, gfp_mask); if (skb) { - int npages; int i; /* No pages, we're done... */ if (!data_len) break; - npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; skb->truesize += data_len; skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 602dade7e9a3..9810610d26c6 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -208,7 +208,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; @@ -227,7 +227,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) static int dn_long_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; unsigned char *data; @@ -274,7 +274,7 @@ static int dn_long_output(struct sk_buff *skb) static int dn_short_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -318,7 +318,7 @@ static int dn_short_output(struct sk_buff *skb) static int dn_phase3_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 74544bc6fdec..b91b60363c39 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -241,9 +241,11 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { + struct neighbour *n = dst_get_neighbour(dst); u32 min_mtu = 230; - struct dn_dev *dn = dst->neighbour ? - rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; + struct dn_dev *dn; + + dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -715,7 +717,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if ((neigh = dst->neighbour) == NULL) + if ((neigh = dst_get_neighbour(dst)) == NULL) goto error; skb->dev = dev; @@ -750,7 +752,7 @@ static int dn_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); int header_len; #ifdef CONFIG_NETFILTER struct net_device *dev = skb->dev; @@ -833,11 +835,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && rt->dst.neighbour == NULL) { + if (dev != NULL && dst_get_neighbour(&rt->dst) == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->dst.neighbour = n; + dst_set_neighbour(&rt->dst, n); } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1144,7 +1146,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); neigh = NULL; rt->dst.lastuse = jiffies; @@ -1416,7 +1418,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch(res.type) { diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index c1f4154552fc..36d14406261e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err) memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } @@ -264,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); skb_set_transport_header(skb, -ihl); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -371,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1b74d3b64371..d8f852dbf660 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -518,26 +518,32 @@ EXPORT_SYMBOL(arp_find); /* END OF OBSOLETE FUNCTIONS */ +struct neighbour *__arp_bind_neighbour(struct dst_entry *dst, __be32 nexthop) +{ + struct net_device *dev = dst->dev; + + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + nexthop = 0; + return __neigh_lookup_errno( +#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) + dev->type == ARPHRD_ATM ? + clip_tbl_hook : +#endif + &arp_tbl, &nexthop, dev); +} + int arp_bind_neighbour(struct dst_entry *dst) { struct net_device *dev = dst->dev; - struct neighbour *n = dst->neighbour; + struct neighbour *n = dst_get_neighbour(dst); if (dev == NULL) return -EINVAL; if (n == NULL) { - __be32 nexthop = ((struct rtable *)dst)->rt_gateway; - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - nexthop = 0; - n = __neigh_lookup_errno( -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) - dev->type == ARPHRD_ATM ? - clip_tbl_hook : -#endif - &arp_tbl, &nexthop, dev); + n = __arp_bind_neighbour(dst, ((struct rtable *)dst)->rt_gateway); if (IS_ERR(n)) return PTR_ERR(n); - dst->neighbour = n; + dst_set_neighbour(dst, n); } return 0; } @@ -900,7 +906,8 @@ static int arp_process(struct sk_buff *skb) if (addr_type == RTN_UNICAST && (arp_fwd_proxy(in_dev, dev, rt) || arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || - pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { + (rt->dst.dev != dev && + pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 66439a7c6d31..c48323ad268b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1496,7 +1496,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1507,6 +1509,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); + if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) + if ((new_value == 0) && (old_value != 0)) + rt_cache_flush(net, 0); } return ret; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a5b413416da3..530787bc1990 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -457,28 +457,22 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); u32 align = max_t(u32, blksize, esp->padlen); - u32 rem; - - mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); - rem = mtu & (align - 1); - mtu &= ~(align - 1); + unsigned int net_adj; switch (x->props.mode) { - case XFRM_MODE_TUNNEL: - break; - default: case XFRM_MODE_TRANSPORT: - /* The worst case */ - mtu -= blksize - 4; - mtu += min_t(u32, blksize - 4, rem); - break; case XFRM_MODE_BEET: - /* The worst case. */ - mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem); + net_adj = sizeof(struct iphdr); break; + case XFRM_MODE_TUNNEL: + net_adj = 0; + break; + default: + BUG(); } - return mtu - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - + net_adj) & ~(align - 1)) + (net_adj - 2); } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35b74b7..7e454ba8e850 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -142,6 +142,18 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }; /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + change_nexthops(fi) { + if (nexthop_nh->nh_dev) + dev_put(nexthop_nh->nh_dev); + } endfor_nexthops(fi); + + release_net(fi->fib_net); + kfree(fi); +} void free_fib_info(struct fib_info *fi) { @@ -149,14 +161,8 @@ void free_fib_info(struct fib_info *fi) pr_warning("Freeing alive fib_info %p\n", fi); return; } - change_nexthops(fi) { - if (nexthop_nh->nh_dev) - dev_put(nexthop_nh->nh_dev); - nexthop_nh->nh_dev = NULL; - } endfor_nexthops(fi); fib_info_cnt--; - release_net(fi->fib_net); - kfree_rcu(fi, rcu); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58c25ea5a5c1..0d884eb2b14f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1371,6 +1371,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; + if (fi->fib_dead) + continue; if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d577199eabd5..e0d42dbb33fe 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -875,6 +875,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * to be intended in a v3 query. */ max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); + if (!max_delay) + max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3b34d1c86270..29a07b6c7168 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway) + if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8871067560db..d7bb94c48345 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { + struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; - struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec93335901dd..42dd1a90edea 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -568,11 +568,12 @@ void ip_forward_options(struct sk_buff *skb) ) { if (srrptr + 3 > srrspace) break; - if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) + if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0) break; } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; + ip_hdr(skb)->daddr = opt->nexthop; ip_rt_get_source(&optptr[srrptr-1], skb, rt); optptr[2] = srrptr+4; } else if (net_ratelimit()) @@ -640,6 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) } if (srrptr <= srrspace) { opt->srr_is_hit = 1; + opt->nexthop = nexthop; opt->is_changed = 1; } return 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0c99db4c80b1..51a3eec2c706 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -182,6 +182,8 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; + int res; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -203,10 +205,22 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + rcu_read_lock(); + if (dst->hh) { + int res = neigh_hh_output(dst->hh, skb); + + rcu_read_unlock(); + return res; + } else { + neigh = dst_get_neighbour(dst); + if (neigh) { + res = neigh->output(skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); + } if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e5542c1cf..7fbcabafa29b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -252,6 +252,10 @@ static int __init ic_open_devs(void) } } + /* no point in waiting if we could not bring up at least one device */ + if (!ic_first_dev) + goto have_carrier; + /* wait for a carrier on at least one device */ start = jiffies; while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 378b20b7ca6e..6f06f7f39ea2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip_tunnel_link(ipn, nt); return nt; @@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) static int __net_init ipip_init_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); + struct ip_tunnel *t; int err; ipn->tunnels[0] = ipn->tunnels_wc; @@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net) if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(ipn->fb_tunnel_dev); + + strcpy(t->parms.name, ipn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 75ef66f31832..6b95f74a91d3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -91,6 +91,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/prefetch.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -132,6 +133,9 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; + /* * Interface to generic destination cache. */ @@ -412,7 +416,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "HHUptod\tSpecDst"); else { struct rtable *r = v; - int len; + struct neighbour *n; + int len, HHUptod; + + rcu_read_lock(); + n = dst_get_neighbour(&r->dst); + HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; + rcu_read_unlock(); seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", @@ -427,8 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, - r->dst.hh ? (r->dst.hh->hh_output == - dev_queue_xmit) : 0, + HHUptod, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); @@ -821,6 +830,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } +static void rt_check_expire(void) +{ + static unsigned int rover; + unsigned int i = rover, goal; + struct rtable *rth; + struct rtable __rcu **rthp; + unsigned long samples = 0; + unsigned long sum = 0, sum2 = 0; + unsigned long delta; + u64 mult; + + delta = jiffies - expires_ljiffies; + expires_ljiffies = jiffies; + mult = ((u64)delta) << rt_hash_log; + if (ip_rt_gc_timeout > 1) + do_div(mult, ip_rt_gc_timeout); + goal = (unsigned int)mult; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; + for (; goal > 0; goal--) { + unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; + + i = (i + 1) & rt_hash_mask; + rthp = &rt_hash_table[i].chain; + + if (need_resched()) + cond_resched(); + + samples++; + + if (rcu_dereference_raw(*rthp) == NULL) + continue; + length = 0; + spin_lock_bh(rt_hash_lock_addr(i)); + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { + prefetch(rth->dst.rt_next); + if (rt_is_expired(rth)) { + *rthp = rth->dst.rt_next; + rt_free(rth); + continue; + } + if (rth->dst.expires) { + /* Entry is expired even if it is in use */ + if (time_before_eq(jiffies, rth->dst.expires)) { +nofree: + tmo >>= 1; + rthp = &rth->dst.rt_next; + /* + * We only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + length += has_noalias(rt_hash_table[i].chain, rth); + continue; + } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; + + /* Cleanup aged off entries. */ + *rthp = rth->dst.rt_next; + rt_free(rth); + } + spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); + } + rover = i; +} + +/* + * rt_worker_func() is run in process context. + * we call rt_check_expire() to scan part of the hash table + */ +static void rt_worker_func(struct work_struct *work) +{ + rt_check_expire(); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); +} + /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -1269,11 +1369,41 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } +static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +{ + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; + struct neighbour *n, *old_n; + + dst_confirm(&rt->dst); + + rt->rt_gateway = peer->redirect_learned.a4; + n = __arp_bind_neighbour(&rt->dst, rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); + rt->rt_gateway = orig_gw; + return -EAGAIN; + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } + return 0; +} + /* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { + int s, i; struct in_device *in_dev = __in_dev_get_rcu(dev); + __be32 skeys[2] = { saddr, 0 }; + int ikeys[2] = { dev->ifindex, 0 }; struct inet_peer *peer; struct net *net; @@ -1296,13 +1426,43 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - peer = inet_getpeer_v4(daddr, 1); - if (peer) { - peer->redirect_learned.a4 = new_gw; + for (s = 0; s < 2; s++) { + for (i = 0; i < 2; i++) { + unsigned int hash; + struct rtable __rcu **rthp; + struct rtable *rt; - inet_putpeer(peer); + hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); - atomic_inc(&__rt_peer_genid); + rthp = &rt_hash_table[hash].chain; + + while ((rt = rcu_dereference(*rthp)) != NULL) { + rthp = &rt->dst.rt_next; + + if (rt->rt_key_dst != daddr || + rt->rt_key_src != skeys[s] || + rt->rt_oif != ikeys[i] || + rt_is_input_route(rt) || + rt_is_expired(rt) || + !net_eq(dev_net(rt->dst.dev), net) || + rt->dst.error || + rt->dst.dev != dev || + rt->rt_gateway != old_gw) + continue; + + if (!rt->peer) + rt_bind_peer(rt, rt->rt_dst, 1); + + peer = rt->peer; + if (peer) { + if (peer->redirect_learned.a4 != new_gw) { + peer->redirect_learned.a4 = new_gw; + atomic_inc(&__rt_peer_genid); + } + check_peer_redir(&rt->dst, peer); + } + } + } } return; @@ -1589,31 +1749,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) -{ - struct rtable *rt = (struct rtable *) dst; - __be32 orig_gw = rt->rt_gateway; - - dst_confirm(&rt->dst); - - neigh_release(rt->dst.neighbour); - rt->dst.neighbour = NULL; - - rt->rt_gateway = peer->redirect_learned.a4; - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); - rt->rt_gateway = orig_gw; - return -EAGAIN; - } else { - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, - rt->dst.neighbour); - } - return 0; -} - static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; @@ -3088,6 +3223,13 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec_jiffies, }, { + .procname = "gc_interval", + .data = &ip_rt_gc_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), @@ -3297,6 +3439,11 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); + INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); + expires_ljiffies = jiffies; + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); + if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 438262977b0f..895f2157e136 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -277,6 +277,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct rtable *rt; __u8 rcv_wscale; bool ecn_ok = false; + struct flowi4 fl4; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -344,20 +345,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - { - struct flowi4 fl4; - - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, - inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); - security_req_classify_flow(req, flowi4_to_flowi(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); - if (IS_ERR(rt)) { - reqsk_free(req); - goto out; - } + flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + RT_SCOPE_UNIVERSE, IPPROTO_TCP, + inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, th->source, th->dest); + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); + if (IS_ERR(rt)) { + reqsk_free(req); + goto out; } /* Try to redo what tcp_v4_send_synack did. */ @@ -371,5 +368,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rcv_wscale = rcv_wscale; ret = get_cookie_sock(sk, skb, req, &rt->dst); + /* ip_queue_xmit() depends on our flow being setup + * Normal sockets get it right from inet_csk_route_child_sock() + */ + if (ret) + inet_sk(ret)->cork.fl.u.ip4 = fl4; out: return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 09ced58e6a51..58a944f4f791 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -854,8 +854,7 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -864,7 +863,7 @@ wait_for_memory: } out: - if (copied) + if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) tcp_push(sk, flags, mss_now, tp->nonagle); return copied; @@ -3236,7 +3235,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long limit; - int i, max_share, cnt; + int i, max_rshare, max_wshare, cnt; unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3300,15 +3299,16 @@ void __init tcp_init(void) /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); - max_share = min(4UL*1024*1024, limit); + max_wshare = min(4UL*1024*1024, limit); + max_rshare = min(6UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_share); + sysctl_tcp_wmem[2] = max(64*1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_share); + sysctl_tcp_rmem[2] = max(87380, max_rshare); printk(KERN_INFO "TCP: Hash tables configured " "(established %u bind %u)\n", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c68040fe9cd9..7410a8c28e14 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -83,7 +83,7 @@ int sysctl_tcp_ecn __read_mostly = 2; EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; -int sysctl_tcp_adv_win_scale __read_mostly = 2; +int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); int sysctl_tcp_stdurg __read_mostly; @@ -328,6 +328,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) incr = __tcp_grow_window(sk, skb); if (incr) { + incr = max_t(int, incr, 2 * skb->len); tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); inet_csk(sk)->icsk_ack.quick |= 1; @@ -460,8 +461,11 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) if (!win_dep) { m -= (new_sample >> 3); new_sample += m; - } else if (m < new_sample) - new_sample = m << 3; + } else { + m <<= 3; + if (m < new_sample) + new_sample = m; + } } else { /* No previous measure. */ new_sample = m << 3; @@ -1289,25 +1293,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, - struct tcp_sacktag_state *state, +/* Mark the given newly-SACKed range as such, adjusting counters and hints. */ +static u8 tcp_sacktag_one(struct sock *sk, + struct tcp_sacktag_state *state, u8 sacked, + u32 start_seq, u32 end_seq, int dup_sack, int pcount) { struct tcp_sock *tp = tcp_sk(sk); - u8 sacked = TCP_SKB_CB(skb)->sacked; int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans && - after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + after(end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) state->reord = min(fack_count, state->reord); } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + if (!after(end_seq, tp->snd_una)) return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { @@ -1326,13 +1331,13 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ - if (before(TCP_SKB_CB(skb)->seq, + if (before(start_seq, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) + if (!after(end_seq, tp->frto_highmark)) state->flag |= FLAG_ONLY_ORIG_SACKED; } @@ -1350,8 +1355,7 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->lost_skb_hint)->seq)) + before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; if (fack_count > tp->fackets_out) @@ -1370,6 +1374,9 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return sacked; } +/* Shift newly-SACKed bytes from this skb to the immediately previous + * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. + */ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, @@ -1377,9 +1384,20 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev = tcp_write_queue_prev(sk, skb); + u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ + u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ BUG_ON(!pcount); + /* Adjust counters and hints for the newly sacked sequence + * range but discard the return value since prev is already + * marked. We must tag the range first because the seq + * advancement below implicitly advances + * tcp_highest_sack_seq() when skb is highest_sack. + */ + tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, + start_seq, end_seq, dup_sack, pcount); + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1406,9 +1424,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_type = 0; } - /* We discard results */ - tcp_sacktag_one(skb, sk, state, dup_sack, pcount); - /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -1556,6 +1571,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } + /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + if (!skb_shift(prev, skb, len)) goto fallback; if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) @@ -1646,10 +1665,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, break; if (in_sack) { - TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, - state, - dup_sack, - tcp_skb_pcount(skb)); + TCP_SKB_CB(skb)->sacked = + tcp_sacktag_one(sk, + state, + TCP_SKB_CB(skb)->sacked, + TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, + dup_sack, + tcp_skb_pcount(skb)); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -2534,6 +2557,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (cnt > packets) { if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69790aa31981..53a5af66c0bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -630,7 +630,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; + key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -650,6 +650,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. using iif for oif to + * make sure we can deliver it + */ + arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, @@ -1449,9 +1454,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; - if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) - goto put_and_exit; - + if (!dst) { + dst = inet_csk_route_child_sock(sk, newsk, req); + if (!dst) + goto put_and_exit; + } else { + /* syncookie case : see end of cookie_v4_check() */ + } sk_setup_caps(newsk, dst); tcp_mtup_init(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 882e0b0964d0..faf257b94154 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1134,11 +1134,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) sk_mem_uncharge(sk, len); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - /* Any change of skb->len requires recalculation of tso - * factor and mss. - */ + /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); + tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); return 0; } diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 63418185f524..e3db3f915114 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); - - memmove(skb->data - skb->mac_len, skb_mac_header(skb), - skb->mac_len); - skb_set_mac_header(skb, -skb->mac_len); + skb_mac_header_rebuild(skb); xfrm4_beet_make_header(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 534972e114ac..ed4bf11ef9f4 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - const unsigned char *old_mac; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cf2cf62f33fc..8a4bf719c253 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -433,6 +433,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + /* Join all-router multicast group if forwarding is set */ + if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST)) + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + return ndev; } @@ -656,7 +660,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (rt->rt6i_nexthop == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2195ae651923..4c0f894d0843 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -324,8 +324,6 @@ static void ah6_output_done(struct crypto_async_request *base, int err) #endif } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } @@ -466,12 +464,12 @@ static void ah6_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); skb_set_transport_header(skb, -hdr_len); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -583,8 +581,6 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1ac7938dd9ec..65dd5433f08b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -411,19 +411,15 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); u32 align = max_t(u32, blksize, esp->padlen); - u32 rem; + unsigned int net_adj; - mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); - rem = mtu & (align - 1); - mtu &= ~(align - 1); - - if (x->props.mode != XFRM_MODE_TUNNEL) { - u32 padsize = ((blksize - 1) & 7) + 1; - mtu -= blksize - padsize; - mtu += min_t(u32, blksize - padsize, rem); - } + if (x->props.mode != XFRM_MODE_TUNNEL) + net_adj = sizeof(struct ipv6hdr); + else + net_adj = 0; - return mtu - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - + net_adj) & ~(align - 1)) + (net_adj - 2); } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4076a0b14b20..0f9b37a1c1d4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e17596b8407a..ae9f6d436171 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -100,6 +100,8 @@ static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct neighbour *neigh; + int res; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -134,10 +136,22 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + rcu_read_lock(); + if (dst->hh) { + res = neigh_hh_output(dst->hh, skb); + + rcu_read_unlock(); + return res; + } else { + neigh = dst_get_neighbour(dst); + if (neigh) { + res = neigh->output(skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); + } IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -385,6 +399,7 @@ int ip6_forward(struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct neighbour *n; u32 mtu; if (net->ipv6.devconf_all->forwarding == 0) @@ -459,11 +474,10 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb_sec_path(skb)) { + n = dst_get_neighbour(dst); + if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; - struct neighbour *n = dst->neighbour; /* * incoming and outgoing devices are the same @@ -949,8 +963,11 @@ out: static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { - int err; struct net *net = sock_net(sk); +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + struct neighbour *n; +#endif + int err; if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); @@ -976,11 +993,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + rcu_read_lock(); + n = dst_get_neighbour(*dst); + if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1020,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif @@ -1172,6 +1194,29 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } +static void ip6_append_data_mtu(int *mtu, + int *maxfraglen, + unsigned int fragheaderlen, + struct sk_buff *skb, + struct rt6_info *rt) +{ + if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { + if (skb == NULL) { + /* first fragment, reserve header_len */ + *mtu = *mtu - rt->dst.header_len; + + } else { + /* + * this fragment is not first, the headers + * space is regarded as data space. + */ + *mtu = dst_mtu(rt->dst.path); + } + *maxfraglen = ((*mtu - fragheaderlen) & ~7) + + fragheaderlen - sizeof(struct frag_hdr); + } +} + int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -1181,7 +1226,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; - struct sk_buff *skb; + struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen; int exthdrlen; int hh_len; @@ -1238,8 +1283,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; @@ -1334,38 +1383,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; - struct sk_buff *skb_prev; alloc_new_skb: - skb_prev = skb; - /* There's no room in the current skb */ - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; + if (skb) + fraggap = skb->len - maxfraglen; else fraggap = 0; + /* update mtu and maxfraglen if necessary */ + if (skb == NULL || skb_prev == NULL) + ip6_append_data_mtu(&mtu, &maxfraglen, + fragheaderlen, skb, rt); + + skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) - datalen = maxfraglen - fragheaderlen; - fraglen = datalen + fragheaderlen; + if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; - /* - * The last fragment gets additional space at tail. - * Note: we overallocate on fragments with MSG_MODE - * because we have no idea if we're the last one. - */ - if (datalen == length + fraggap) - alloclen += rt->dst.trailer_len; + if (datalen != length + fraggap) { + /* + * this is not the last fragment, the trailer + * space is regarded as data space. + */ + datalen += rt->dst.trailer_len; + } + + alloclen += rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; /* * We just reserve space for fragment header. diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 82a809901f8e..86e3cc10fc2e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -2051,8 +2053,10 @@ int ip6_mr_input(struct sk_buff *skb) int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ee7839f4d6e3..f2d74ea19a76 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -257,7 +257,6 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); dst_release(&rt->dst); } } else @@ -2055,7 +2054,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!delta) pmc->mca_sfcount[sfmode]--; for (j=0; j<i; j++) - (void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]); + ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) { struct ip6_sf_list *psf; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7596f071d308..10a8d411707e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); if (rt) - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (rt && lifetime == 0) { neigh_clone(neigh); @@ -1265,7 +1265,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0ef1f086feb8..8e600f827fe7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -356,7 +356,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -365,8 +365,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -379,8 +381,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -404,8 +409,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = rt->rt6i_nexthop; + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -422,6 +430,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -745,8 +754,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_add dst_free(&rt->dst); return NULL; } - rt->rt6i_nexthop = neigh; - + dst_set_neighbour(&rt->dst, neigh); } return rt; @@ -760,7 +768,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_a rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -794,7 +802,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1058,7 +1066,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, } rt->rt6i_idev = idev; - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; @@ -1338,12 +1346,12 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_prefsrc.plen = 0; if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(rt->rt6i_nexthop)) { - err = PTR_ERR(rt->rt6i_nexthop); - rt->rt6i_nexthop = NULL; + struct neighbour *neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); goto out; } + dst_set_neighbour(&rt->dst, neigh); } rt->rt6i_flags = cfg->fc_flags; @@ -1574,7 +1582,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->dst.neighbour) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt); @@ -1590,7 +1598,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); - nrt->rt6i_nexthop = neigh_clone(neigh); + dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); if (ip6_ins_rt(nrt)) goto out; @@ -1670,7 +1678,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -2035,7 +2043,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return ERR_CAST(neigh); } - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -2312,6 +2320,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2400,8 +2409,15 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) { + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } + } + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2585,6 +2601,7 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; + struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2593,12 +2610,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - - if (rt->rt6i_nexthop) { - seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) { + seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1cca5761aea9..f56acd096598 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip6_tunnel_link(sitn, nt); @@ -677,7 +679,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -702,7 +704,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -1141,7 +1143,6 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1204,6 +1205,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; @@ -1228,6 +1230,9 @@ static int __net_init sit_init_net(struct net *net) if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(sitn->fb_tunnel_dev); + + strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 296510a82c35..848f9634bbdf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1096,7 +1096,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); + key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr); #endif if (th->ack) @@ -1514,6 +1514,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + tcp_initialize_rcv_mss(newsk); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 3437d7d4eed6..f37cba9e6891 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *ip6h; - const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); int err; @@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) __skb_push(skb, size); skb_reset_network_header(skb); - - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_mac_header_rebuild(skb); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4d6edff0498f..23ecd68a5e62 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - const unsigned char *old_mac; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; @@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d2726a74597d..3c55f633928e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -167,6 +167,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (dev) { unregister_netdev(dev); spriv->dev = NULL; + module_put(THIS_MODULE); } } } @@ -254,6 +255,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p if (rc < 0) goto out_del_dev; + __module_get(THIS_MODULE); /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b6466e71f5e1..78bc442b2b6f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,9 +251,16 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr; - int ret = -EINVAL; + int ret; int chk_addr_ret; + if (!sock_flag(sk, SOCK_ZAPPED)) + return -EINVAL; + if (addr_len < sizeof(struct sockaddr_l2tpip)) + return -EINVAL; + if (addr->l2tp_family != AF_INET) + return -EINVAL; + ret = -EADDRINUSE; read_lock_bh(&l2tp_ip_lock); if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id)) @@ -283,6 +290,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_del_node_init(sk); write_unlock_bh(&l2tp_ip_lock); ret = 0; + sock_reset_flag(sk, SOCK_ZAPPED); + out: release_sock(sk); @@ -303,13 +312,14 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len __be32 saddr; int oif, rc; - rc = -EINVAL; + if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ + return -EINVAL; + if (addr_len < sizeof(*lsa)) - goto out; + return -EINVAL; - rc = -EAFNOSUPPORT; if (lsa->l2tp_family != AF_INET) - goto out; + return -EAFNOSUPPORT; lock_sock(sk); @@ -363,6 +373,14 @@ out: return rc; } +static int l2tp_ip_disconnect(struct sock *sk, int flags) +{ + if (sock_flag(sk, SOCK_ZAPPED)) + return 0; + + return udp_disconnect(sk, flags); +} + static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { @@ -393,11 +411,6 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) { int rc; - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto drop; - - nf_reset(skb); - /* Charge it to the socket, dropping if the queue is full. */ rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) @@ -446,8 +459,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m daddr = lip->l2tp_addr.s_addr; } else { + rc = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; + goto out; daddr = inet->inet_daddr; connected = 1; @@ -595,7 +609,7 @@ static struct proto l2tp_ip_prot = { .close = l2tp_ip_close, .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, - .disconnect = udp_disconnect, + .disconnect = l2tp_ip_disconnect, .ioctl = udp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 39a21d0c61c4..13f9868e6949 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -908,7 +908,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, goto end_put_sess; } - inet = inet_sk(sk); + inet = inet_sk(tunnel->sock); if (tunnel->version == 2) { struct sockaddr_pppol2tp sp; len = sizeof(sp); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index dfd3a648a551..a18e6c3d36e3 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } - /* For non stream protcols we get one packet per recvmsg call */ - if (sk->sk_type != SOCK_STREAM) - goto copy_uaddr; - /* Partial read */ if (used + offset < skb->len) continue; @@ -857,6 +857,12 @@ copy_uaddr: } if (llc_sk(sk)->cmsg_flags) llc_cmsg_rcv(msg, skb); + + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb, 0); + *seq = 0; + } + goto out; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 9c0d76cdca92..1a41b1423d24 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -48,6 +48,8 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) container_of(h, struct tid_ampdu_rx, rcu_head); int i; + del_timer_sync(&tid_rx->reorder_timer); + for (i = 0; i < tid_rx->buf_size; i++) dev_kfree_skb(tid_rx->reorder_buf[i]); kfree(tid_rx->reorder_buf); @@ -87,7 +89,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, tid, 0, reason); del_timer_sync(&tid_rx->session_timer); - del_timer_sync(&tid_rx->reorder_timer); call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index db7db43ccf42..b7f4f5c1f693 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -304,6 +304,38 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid) __release(agg_queue); } +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + int queue = ieee80211_ac_from_tid(tid); + unsigned long flags; + + ieee80211_stop_queue_agg(local, tid); + + if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" + " from the pending queue\n", tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) +{ + ieee80211_wake_queue_agg(local, tid); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -315,19 +347,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* - * While we're asking the driver about the aggregation, - * stop the AC queue so that we don't have to worry - * about frames that came in while we were doing that, - * which would require us to put them to the AC pending - * afterwards which just makes the code more complex. + * Start queuing up packets for this aggregation session. + * We're going to release them once the driver is OK with + * that. */ - ieee80211_stop_queue_agg(local, tid); - clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* - * make sure no packets are being processed to get - * valid starting sequence number + * Make sure no packets are being processed. This ensures that + * we have a valid starting sequence number and that in-flight + * packets have been flushed out and no packets for this TID + * will go into the driver during the ampdu_action call. */ synchronize_net(); @@ -341,17 +371,15 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) " tid %d\n", tid); #endif spin_lock_bh(&sta->lock); + ieee80211_agg_splice_packets(local, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); + ieee80211_agg_splice_finish(local, tid); spin_unlock_bh(&sta->lock); - ieee80211_wake_queue_agg(local, tid); kfree_rcu(tid_tx, rcu_head); return; } - /* we can take packets again now */ - ieee80211_wake_queue_agg(local, tid); - /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -471,38 +499,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_local *local, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - int queue = ieee80211_ac_from_tid(tid); - unsigned long flags; - - ieee80211_stop_queue_agg(local, tid); - - if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" - " from the pending queue\n", tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) -{ - ieee80211_wake_queue_agg(local, tid); -} - static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 895eec19f2e2..65f3764c5aa2 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -498,6 +498,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_configure_filter(local); break; default: + mutex_lock(&local->mtx); + if (local->hw_roc_dev == sdata->dev && + local->hw_roc_channel) { + /* ignore return value since this is racy */ + drv_cancel_remain_on_channel(local); + ieee80211_queue_work(&local->hw, &local->hw_roc_done); + } + mutex_unlock(&local->mtx); + + flush_work(&local->hw_roc_start); + flush_work(&local->hw_roc_done); + flush_work(&sdata->work); /* * When we get here, the interface is marked down. diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 866f269183cf..1e36fb3318cb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -910,6 +910,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", result); + ieee80211_led_init(local); + rtnl_lock(); result = ieee80211_init_rate_ctrl_alg(local, @@ -931,8 +933,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) rtnl_unlock(); - ieee80211_led_init(local); - local->network_latency_notifier.notifier_call = ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 13427b194ced..c55eb9d8ea55 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -251,6 +251,22 @@ static void ieee80211_hw_roc_done(struct work_struct *work) return; } + /* was never transmitted */ + if (local->hw_roc_skb) { + u64 cookie; + + cookie = local->hw_roc_cookie ^ 2; + + cfg80211_mgmt_tx_status(local->hw_roc_dev, cookie, + local->hw_roc_skb->data, + local->hw_roc_skb->len, false, + GFP_KERNEL); + + kfree_skb(local->hw_roc_skb); + local->hw_roc_skb = NULL; + local->hw_roc_skb_for_status = NULL; + } + if (!local->hw_roc_for_tx) cfg80211_remain_on_channel_expired(local->hw_roc_dev, local->hw_roc_cookie, diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3d5a2cb835c4..816590b0d7f2 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -314,7 +314,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { info->control.rates[i].idx = -1; info->control.rates[i].flags = 0; - info->control.rates[i].count = 1; + info->control.rates[i].count = 0; } if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 378bd67334ba..41000650f4a0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -610,7 +610,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && - tid_agg_rx->stored_mpdu_num > 1) { + tid_agg_rx->stored_mpdu_num) { /* * No buffers ready to be released, but check whether any * frames in the reorder buffer have timed out. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3104c844b544..da878c14182c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1222,7 +1222,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) return TX_DROP; - } else if (info->flags & IEEE80211_TX_CTL_INJECTED) { + } else if (info->flags & IEEE80211_TX_CTL_INJECTED || + tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } if (!tx->sta) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2124db8e72be..11d9d49f22dc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1254,6 +1254,12 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } + /* add back keys */ + list_for_each_entry(sdata, &local->interfaces, list) + if (ieee80211_sdata_running(sdata)) + ieee80211_enable_keys(sdata); + + wake_up: /* * Clear the WLAN_STA_BLOCK_BA flag so new aggregation * sessions can be established after a resume. @@ -1275,12 +1281,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } - /* add back keys */ - list_for_each_entry(sdata, &local->interfaces, list) - if (ieee80211_sdata_running(sdata)) - ieee80211_enable_keys(sdata); - - wake_up: ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 8f6a302d2ac3..aa1c40ab6a7c 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -109,7 +109,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail; - if (!(status->flag & RX_FLAG_IV_STRIPPED)) + if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key) goto update_iv; return RX_CONTINUE; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 24c28d238dcb..0787bed04180 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -233,6 +233,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, __be16 dport = 0; /* destination port to forward */ unsigned int flags; struct ip_vs_conn_param param; + const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -268,7 +269,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc, { int protocol = iph.protocol; const union nf_inet_addr *vaddr = &iph.daddr; - const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; __be16 vport = 0; if (dst_port == svc->port) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ef64adf7362..24bc620b539a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -830,12 +830,19 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, return 0; } -int netlink_sendskb(struct sock *sk, struct sk_buff *skb) +static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); + return len; +} + +int netlink_sendskb(struct sock *sk, struct sk_buff *skb) +{ + int len = __netlink_sendskb(sk, skb); + sock_put(sk); return len; } @@ -960,8 +967,7 @@ static inline int netlink_broadcast_deliver(struct sock *sk, if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !test_bit(0, &nlk->state)) { skb_set_owner_r(skb, sk); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + __netlink_sendskb(sk, skb); return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; } return -1; @@ -1682,10 +1688,8 @@ static int netlink_dump(struct sock *sk) if (sk_filter(sk, skb)) kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } + else + __netlink_sendskb(sk, skb); return 0; } @@ -1697,10 +1701,8 @@ static int netlink_dump(struct sock *sk) if (sk_filter(sk, skb)) kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } + else + __netlink_sendskb(sk, skb); if (cb->done) cb->done(cb); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index f17fd841f948..d29a7fb3f61d 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1045,6 +1045,9 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, int flags = msg->msg_flags; int err, done; + if (len > USHRT_MAX) + return -EMSGSIZE; + if ((msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) || !(msg->msg_flags & MSG_EOR)) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index bb6ad81b671d..424ff622ab5f 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -68,7 +68,6 @@ static int rds_release(struct socket *sock) { struct sock *sk = sock->sk; struct rds_sock *rs; - unsigned long flags; if (!sk) goto out; @@ -94,10 +93,10 @@ static int rds_release(struct socket *sock) rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); rds_sock_count--; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); rds_trans_put(rs->rs_transport); @@ -409,7 +408,6 @@ static const struct proto_ops rds_proto_ops = { static int __rds_create(struct socket *sock, struct sock *sk, int protocol) { - unsigned long flags; struct rds_sock *rs; sock_init_data(sock, sk); @@ -426,10 +424,10 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); rds_sock_count++; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); return 0; } @@ -471,12 +469,11 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, { struct rds_sock *rs; struct rds_incoming *inc; - unsigned long flags; unsigned int total = 0; len /= sizeof(struct rds_info_message); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { read_lock(&rs->rs_recv_lock); @@ -492,7 +489,7 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, read_unlock(&rs->rs_recv_lock); } - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); lens->nr = total; lens->each = sizeof(struct rds_info_message); @@ -504,11 +501,10 @@ static void rds_sock_info(struct socket *sock, unsigned int len, { struct rds_info_socket sinfo; struct rds_sock *rs; - unsigned long flags; len /= sizeof(struct rds_info_socket); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); if (len < rds_sock_count) goto out; @@ -529,7 +525,7 @@ out: lens->nr = rds_sock_count; lens->each = sizeof(struct rds_info_socket); - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); } static void rds_exit(void) diff --git a/net/rds/send.c b/net/rds/send.c index d58ae5f9339e..c803341f2844 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -932,7 +932,6 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { - printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags); ret = -EOPNOTSUPP; goto out; } diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 178ff4f73c85..2679507ad333 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -96,11 +96,11 @@ static int rose_set_mac_address(struct net_device *dev, void *addr) struct sockaddr *sa = addr; int err; - if (!memcpy(dev->dev_addr, sa->sa_data, dev->addr_len)) + if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) return 0; if (dev->flags & IFF_UP) { - err = rose_add_loopback_node((rose_address *)dev->dev_addr); + err = rose_add_loopback_node((rose_address *)sa->sa_data); if (err) return err; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 06afbaeb4c88..178ee83175a4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -225,8 +225,7 @@ struct choke_skb_cb { static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb)); return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index b9493a09a870..e1afe0c205fa 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -385,7 +385,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched_data *q; if (table->tab[dp] == NULL) { - table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); + table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC); if (table->tab[dp] == NULL) return -ENOMEM; } @@ -544,11 +544,8 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.packets = q->packetsin; opt.bytesin = q->bytesin; - if (gred_wred_mode(table)) { - q->parms.qidlestart = - table->tab[table->def]->parms.qidlestart; - q->parms.qavg = table->tab[table->def]->parms.qavg; - } + if (gred_wred_mode(table)) + gred_load_wred_set(table, q); opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ea17cbed29ef..59b26b8ff4b0 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -106,7 +106,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; - if (nla_len(opt) < sizeof(*qopt)) + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index eb3b9a86c6ed..f0913ffc18d6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -118,8 +118,7 @@ struct netem_skb_cb { static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } @@ -352,10 +351,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) { - sch->qstats.drops++; - return NET_XMIT_DROP; - } + skb_checksum_help(skb))) + return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } @@ -383,8 +380,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->counter = 0; __skb_queue_head(&q->qdisc->q, skb); - q->qdisc->qstats.backlog += qdisc_pkt_len(skb); - q->qdisc->qstats.requeues++; + sch->qstats.backlog += qdisc_pkt_len(skb); + sch->qstats.requeues++; ret = NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0a833d0c1f61..47ee29fad350 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -93,8 +93,7 @@ struct sfb_skb_cb { static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < - sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb)); return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 45cd30098e34..4f4c52c0eeb3 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int -__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, + struct net_device *dev, struct netdev_queue *txq, + struct neighbour *mn) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = skb_dst(skb)->neighbour; + struct teql_sched_data *q = qdisc_priv(txq->qdisc); struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } static inline int teql_resolve(struct sk_buff *skb, - struct sk_buff *skb_res, struct net_device *dev) + struct sk_buff *skb_res, + struct net_device *dev, + struct netdev_queue *txq) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *mn; + int res; + if (txq->qdisc == &noop_qdisc) return -ENODEV; - if (dev->header_ops == NULL || - skb_dst(skb) == NULL || - skb_dst(skb)->neighbour == NULL) + if (!dev->header_ops || !dst) return 0; - return __teql_resolve(skb, skb_res, dev); + + rcu_read_lock(); + mn = dst_get_neighbour(dst); + res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + rcu_read_unlock(); + + return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) @@ -307,7 +316,7 @@ restart: continue; } - switch (teql_resolve(skb, skb_res, slave)) { + switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 4a62888f2e43..17a6e658a4ca 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -173,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - (unsigned long)sp->autoclose * HZ; + min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) diff --git a/net/sctp/output.c b/net/sctp/output.c index 08b3cead6503..8fc4dcd294ab 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -377,9 +377,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) */ skb_set_owner_w(nskb, sk); - /* The 'obsolete' field of dst is set to 2 when a dst is freed. */ - if (!dst || (dst->obsolete > 1)) { - dst_release(dst); + if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { sctp_assoc_sync_pmtu(asoc); @@ -697,13 +695,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; - /* Update our view of the receiver's rwnd. Include sk_buff overhead - * while updating peer.rwnd so that it reduces the chances of a - * receiver running out of receive buffer space even when receive - * window is still open. This can happen when a sender is sending - * sending small messages. - */ - datasize += sizeof(struct sk_buff); + /* Update our view of the receiver's rwnd. */ if (datasize < rwnd) rwnd -= datasize; else diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d03682109b7a..1f2938fbf9b7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, chunk->transport->flight_size -= sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); } continue; } @@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); if (chunk->transport) transport->flight_size -= sctp_data_size(chunk); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 207175b2f40a..946afd6045c3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1144,6 +1144,9 @@ SCTP_STATIC __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; + /* Initialize maximum autoclose timeout. */ + sctp_max_autoclose = INT_MAX / HZ; + /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d3ccf7973c59..4434853a9fe7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2129,8 +2129,6 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; - /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ - sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ); return 0; } @@ -4011,9 +4009,10 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len < sizeof(struct sctp_event_subscribe)) + if (len <= 0) return -EINVAL; - len = sizeof(struct sctp_event_subscribe); + if (len > sizeof(struct sctp_event_subscribe)) + len = sizeof(struct sctp_event_subscribe); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len)) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 50cb57f0919e..6752f489febf 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -53,6 +53,10 @@ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; +static unsigned long max_autoclose_min = 0; +static unsigned long max_autoclose_max = + (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) + ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -251,6 +255,15 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &rwnd_scale_max, }, + { + .procname = "max_autoclose", + .data = &sctp_max_autoclose, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &max_autoclose_min, + .extra2 = &max_autoclose_max, + }, { /* sentinel */ } }; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 394c57ca2f54..8da4481ed30a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -226,23 +226,6 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -/* this is a complete rip-off from __sk_dst_check - * the cookie is always 0 since this is how it's used in the - * pmtu code - */ -static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) -{ - struct dst_entry *dst = t->dst; - - if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { - dst_release(t->dst); - t->dst = NULL; - return NULL; - } - - return dst; -} - void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst; diff --git a/net/socket.c b/net/socket.c index 1ad42d3604e4..cf41afcc89bb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -791,9 +791,9 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, sock = file->private_data; - flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; - if (more) - flags |= MSG_MORE; + flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ + flags |= more; return kernel_sendpage(sock, page, offset, size, flags); } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 72ad836e4fe0..4530a912b8b0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -828,6 +828,8 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, { ssize_t ret; + if (count == 0) + return -EINVAL; if (copy_from_user(kaddr, buf, count)) return -EFAULT; kaddr[count] = '\0'; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4814e246a874..b6bb22571c57 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -480,14 +480,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -505,13 +509,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2b90292e9505..ce5f111fe325 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) fail_free: kfree(m->to_pool); + m->to_pool = NULL; fail: return -ENOMEM; } @@ -287,7 +288,9 @@ svc_pool_map_put(void) if (!--m->count) { m->mode = SVC_POOL_DEFAULT; kfree(m->to_pool); + m->to_pool = NULL; kfree(m->pool_to); + m->pool_to = NULL; m->npools = 0; } @@ -472,17 +475,20 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - - svc_close_all(&serv->sv_tempsocks); + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ + svc_close_all(serv); if (serv->sv_shutdown) serv->sv_shutdown(serv); - svc_close_all(&serv->sv_permsocks); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); - cache_clean_deferred(serv); if (svc_serv_is_pooled(serv)) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bd31208bbb61..9d7ed0b48b51 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -901,14 +901,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - /* - * The only time we're called while xpt_ready is still on a list - * is while the list itself is about to be destroyed (in - * svc_destroy). BUT svc_xprt_enqueue could still be attempting - * to add new entries to the sp_sockets list, so we can't leave - * a freed xprt on it. - */ - list_del_init(&xprt->xpt_ready); + BUG_ON(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -936,22 +929,48 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list) +{ + struct svc_xprt *xprt; + + list_for_each_entry(xprt, xprt_list, xpt_list) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_BUSY, &xprt->xpt_flags); + } +} + +void svc_close_all(struct svc_serv *serv) { + struct svc_pool *pool; struct svc_xprt *xprt; struct svc_xprt *tmp; + int i; + + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + while (!list_empty(&pool->sp_sockets)) { + xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + } + spin_unlock_bh(&pool->sp_lock); + } /* - * The server is shutting down, and no more threads are running. - * svc_xprt_enqueue() might still be running, but at worst it - * will re-add the xprt to sp_sockets, which will soon get - * freed. So we don't bother with any more locking, and don't - * leave the close to the (nonexistent) server threads: + * At this point the sp_sockets lists will stay empty, since + * svc_enqueue will not add new entries without taking the + * sp_lock and checking XPT_BUSY. */ - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - set_bit(XPT_CLOSE, &xprt->xpt_flags); + list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) svc_delete_xprt(xprt); - } + list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) + svc_delete_xprt(xprt); + + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); } /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 924e4f4d6aa6..4e84e222a490 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1202,6 +1202,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (!netif_running(netdev)) { + result = -ENETDOWN; + goto bad_res; + } + nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { @@ -5506,7 +5511,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_get_key, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5538,7 +5543,7 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .doit = nl80211_addset_beacon, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5546,7 +5551,7 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .doit = nl80211_addset_beacon, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5570,7 +5575,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_station, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5586,7 +5591,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_del_station, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5619,7 +5624,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_del_mpath, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5627,7 +5632,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_bss, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5653,7 +5658,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_get_mesh_config, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5785,7 +5790,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_setdel_pmksa, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5793,7 +5798,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_setdel_pmksa, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5801,7 +5806,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_flush_pmksa, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -5889,7 +5894,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_wds_peer, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 72f7feee254a..9bcb6bc2ce0c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -379,7 +379,15 @@ static void reg_regdb_query(const char *alpha2) schedule_work(®_regdb_work); } + +/* Feel free to add any other sanity checks here */ +static void reg_regdb_size_check(void) +{ + /* We should ideally BUILD_BUG_ON() but then random builds would fail */ + WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it..."); +} #else +static inline void reg_regdb_size_check(void) {} static inline void reg_regdb_query(const char *alpha2) {} #endif /* CONFIG_CFG80211_INTERNAL_REGDB */ @@ -2228,6 +2236,8 @@ int __init regulatory_init(void) spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); + reg_regdb_size_check(); + cfg80211_regdomain = cfg80211_world_regdom; user_alpha2[0] = '9'; diff --git a/net/wireless/util.c b/net/wireless/util.c index 4d7b83fbc32f..30f68dc76ac0 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -937,6 +937,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { struct wireless_dev *wdev_iter; + u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; int total = 1; int i, j; @@ -970,12 +971,14 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; + used_iftypes |= BIT(wdev_iter->iftype); } mutex_unlock(&rdev->devlist_mtx); for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { const struct ieee80211_iface_combination *c; struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; c = &rdev->wiphy.iface_combinations[i]; @@ -990,14 +993,28 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, if (rdev->wiphy.software_iftypes & BIT(iftype)) continue; for (j = 0; j < c->n_limits; j++) { - if (!(limits[j].types & iftype)) + all_iftypes |= limits[j].types; + if (!(limits[j].types & BIT(iftype))) continue; if (limits[j].max < num[iftype]) goto cont; limits[j].max -= num[iftype]; } } - /* yay, it fits */ + + /* + * Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* + * This combination covered all interface types and + * supported the requested numbers, so we're good. + */ kfree(limits); return 0; cont: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5ce74a385525..0c0e40e9cfc1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1497,7 +1497,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto free_dst; /* Copy neighbour for reachability confirmation */ - dst0->neighbour = neigh_clone(dst->neighbour); + dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour(dst))); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); @@ -1917,6 +1917,9 @@ no_transform: } ok: xfrm_pols_put(pols, drop_pols); + if (dst && dst->xfrm && + dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + dst->flags |= DST_XFRM_TUNNEL; return dst; nopol: diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index b11ea692bd7d..3235023eaf4e 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -166,7 +166,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) @@ -293,7 +293,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) @@ -502,7 +502,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static struct xfrm_replay xfrm_replay_legacy = { |