diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2012-02-09 16:13:19 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2012-02-13 11:06:13 -0800 |
commit | 8a533666d1591cf4ea596c6bd710e2fe682cb56a (patch) | |
tree | 0c525703fd834499c659490b3cb38835fde14f51 | |
parent | 323a479328cbc2fb5cf647790f7414ca570a577b (diff) |
net: fix NULL dereferences in check_peer_redir()
[ Upstream commit d3aaeb38c40e5a6c08dd31a1b64da65c4352be36, along
with dependent backports of commits:
69cce1d1404968f78b177a0314f5822d5afdbbfb
9de79c127cccecb11ae6a21ab1499e87aa222880
218fa90f072e4aeff9003d57e390857f4f35513e
580da35a31f91a594f3090b7a2c39b85cb051a12
f7e57044eeb1841847c24aa06766c8290c202583
e049f28883126c689cf95859480d9ee4ab23b7fa ]
Gergely Kalman reported crashes in check_peer_redir().
It appears commit f39925dbde778 (ipv4: Cache learned redirect
information in inetpeer.) added a race, leading to possible NULL ptr
dereference.
Since we can now change dst neighbour, we should make sure a reader can
safely use a neighbour.
Add RCU protection to dst neighbour, and make sure check_peer_redir()
can be called safely by different cpus in parallel.
As neighbours are already freed after one RCU grace period, this patch
should not add typical RCU penalty (cache cold effects)
Many thanks to Gergely for providing a pretty report pointing to the
bug.
Reported-by: Gergely Kalman <synapse@hippy.csoma.elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
32 files changed, 359 insertions, 190 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 8e21d457b899..f2a84c6f8543 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -215,7 +215,9 @@ static int addr4_resolve(struct sockaddr_in *src_in, neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev); if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(rt->dst.neighbour, NULL); + rcu_read_lock(); + neigh_event_send(dst_get_neighbour(&rt->dst), NULL); + rcu_read_unlock(); ret = -ENODATA; if (neigh) goto release; @@ -273,14 +275,16 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } - neigh = dst->neighbour; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(dst->neighbour, NULL); + if (neigh) + neigh_event_send(neigh, NULL); ret = -ENODATA; - goto put; + } else { + ret = rdma_copy_addr(addr, dst->dev, neigh->ha); } - - ret = rdma_copy_addr(addr, dst->dev, neigh->ha); + rcu_read_unlock(); put: dst_release(dst); return ret; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 2332dc22aa04..e55ce7a428be 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1328,6 +1328,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *child_ep, *parent_ep = ctx; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int hwtid = GET_TID(req); + struct neighbour *neigh; struct dst_entry *dst; struct l2t_entry *l2t; struct rtable *rt; @@ -1364,7 +1365,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev); + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + l2t = t3_l2t_get(tdev, neigh, neigh->dev); + rcu_read_unlock(); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1874,10 +1878,11 @@ static int is_loopback_dst(struct iw_cm_id *cm_id) int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { - int err = 0; struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct neighbour *neigh; struct iwch_ep *ep; struct rtable *rt; + int err = 0; if (is_loopback_dst(cm_id)) { err = -ENOSYS; @@ -1933,9 +1938,12 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; + rcu_read_lock(); + neigh = dst_get_neighbour(ep->dst); + /* get a l2t entry */ - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour, - ep->dst->neighbour->dev); + ep->l2t = t3_l2t_get(ep->com.tdev, neigh, neigh->dev); + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 31fb44085c9b..daa93e942e16 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1325,6 +1325,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); + struct neighbour *neigh; struct dst_entry *dst; struct l2t_entry *l2t; struct rtable *rt; @@ -1357,11 +1358,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } dst = &rt->dst; - if (dst->neighbour->dev->flags & IFF_LOOPBACK) { + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh->dev->flags & IFF_LOOPBACK) { pdev = ip_dev_find(&init_net, peer_ip); BUG_ON(!pdev); - l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, dst->neighbour, - pdev, 0); + l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0); mtu = pdev->mtu; tx_chan = cxgb4_port_chan(pdev); smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; @@ -1372,18 +1374,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) rss_qid = dev->rdev.lldi.rxq_ids[cxgb4_port_idx(pdev) * step]; dev_put(pdev); } else { - l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, dst->neighbour, - dst->neighbour->dev, 0); + l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, neigh->dev, 0); mtu = dst_mtu(dst); - tx_chan = cxgb4_port_chan(dst->neighbour->dev); - smac_idx = (cxgb4_port_viid(dst->neighbour->dev) & 0x7F) << 1; + tx_chan = cxgb4_port_chan(neigh->dev); + smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; - txq_idx = cxgb4_port_idx(dst->neighbour->dev) * step; - ctrlq_idx = cxgb4_port_idx(dst->neighbour->dev); + txq_idx = cxgb4_port_idx(neigh->dev) * step; + ctrlq_idx = cxgb4_port_idx(neigh->dev); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(dst->neighbour->dev) * step]; + cxgb4_port_idx(neigh->dev) * step]; } + rcu_read_unlock(); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1847,6 +1849,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_ep *ep; struct rtable *rt; struct net_device *pdev; + struct neighbour *neigh; int step; if ((conn_param->ord > c4iw_max_read_depth) || @@ -1908,14 +1911,16 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; + rcu_read_lock(); + neigh = dst_get_neighbour(ep->dst); + /* get a l2t entry */ - if (ep->dst->neighbour->dev->flags & IFF_LOOPBACK) { + if (neigh->dev->flags & IFF_LOOPBACK) { PDBG("%s LOOPBACK\n", __func__); pdev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - ep->dst->neighbour, - pdev, 0); + neigh, pdev, 0); ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; @@ -1930,21 +1935,20 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) dev_put(pdev); } else { ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - ep->dst->neighbour, - ep->dst->neighbour->dev, 0); + neigh, neigh->dev, 0); ep->mtu = dst_mtu(ep->dst); - ep->tx_chan = cxgb4_port_chan(ep->dst->neighbour->dev); - ep->smac_idx = (cxgb4_port_viid(ep->dst->neighbour->dev) & - 0x7F) << 1; + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(ep->dst->neighbour->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(ep->dst->neighbour->dev); + ep->txq_idx = cxgb4_port_idx(neigh->dev) * step; + ep->ctrlq_idx = cxgb4_port_idx(neigh->dev); step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(ep->dst->neighbour->dev) * step]; + cxgb4_port_idx(neigh->dev) * step]; } + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2001f20a4361..23c04ff6519b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1301,7 +1301,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int is_eth; int is_vlan = 0; int is_grh; - u16 vlan; + u16 vlan = 0; send_size = 0; for (i = 0; i < wr->num_sge; ++i) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index e74cdf9ef471..a1f74f6381ba 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1150,9 +1150,11 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi neigh_release(neigh); } - if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) - neigh_event_send(rt->dst.neighbour, NULL); - + if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) { + rcu_read_lock(); + neigh_event_send(dst_get_neighbour(&rt->dst), NULL); + rcu_read_unlock(); + } ip_rt_put(rt); return rc; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 86addca9ddf6..a98c414978ec 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -555,14 +555,17 @@ static int path_rec_start(struct net_device *dev, return 0; } +/* called with rcu_read_lock */ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; + struct neighbour *n; unsigned long flags; - neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev); + n = dst_get_neighbour(skb_dst(skb)); + neigh = ipoib_neigh_alloc(n, skb->dev); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -571,9 +574,9 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, skb_dst(skb)->neighbour->ha + 4); + path = __path_find(dev, n->ha + 4); if (!path) { - path = path_rec_create(dev, skb_dst(skb)->neighbour->ha + 4); + path = path_rec_create(dev, n->ha + 4); if (!path) goto err_path; @@ -607,7 +610,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) } } else { spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); return; } } else { @@ -634,20 +637,24 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); } +/* called with rcu_read_lock */ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(skb->dev); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n; /* Look up path record for unicasts */ - if (skb_dst(skb)->neighbour->ha[4] != 0xff) { + n = dst_get_neighbour(dst); + if (n->ha[4] != 0xff) { neigh_add_path(skb, dev); return; } /* Add in the P_Key for multicasts */ - skb_dst(skb)->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; - skb_dst(skb)->neighbour->ha[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb); + n->ha[8] = (priv->pkey >> 8) & 0xff; + n->ha[9] = priv->pkey & 0xff; + ipoib_mcast_send(dev, n->ha + 4, skb); } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -712,18 +719,23 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; + struct neighbour *n = NULL; unsigned long flags; - if (likely(skb_dst(skb) && skb_dst(skb)->neighbour)) { - if (unlikely(!*to_ipoib_neigh(skb_dst(skb)->neighbour))) { + rcu_read_lock(); + if (likely(skb_dst(skb))) + n = dst_get_neighbour(skb_dst(skb)); + + if (likely(n)) { + if (unlikely(!*to_ipoib_neigh(n))) { ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + goto unlock; } - neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour); + neigh = *to_ipoib_neigh(n); if (unlikely((memcmp(&neigh->dgid.raw, - skb_dst(skb)->neighbour->ha + 4, + n->ha + 4, sizeof(union ib_gid))) || (neigh->dev != dev))) { spin_lock_irqsave(&priv->lock, flags); @@ -740,17 +752,17 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_neigh_free(dev, neigh); spin_unlock_irqrestore(&priv->lock, flags); ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + goto unlock; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - return NETDEV_TX_OK; + goto unlock; } } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); - return NETDEV_TX_OK; + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); + goto unlock; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { @@ -784,13 +796,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) phdr->hwaddr + 4); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - return NETDEV_TX_OK; + goto unlock; } unicast_arp_send(skb, dev, phdr); } } - +unlock: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -812,6 +825,8 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; + struct dst_entry *dst; + struct neighbour *n; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -823,7 +838,11 @@ static int ipoib_hard_header(struct sk_buff *skb, * destination address onto the front of the skb so we can * figure out where to send the packet later. */ - if ((!skb_dst(skb) || !skb_dst(skb)->neighbour) && daddr) { + dst = skb_dst(skb); + n = NULL; + if (dst) + n = dst_get_neighbour_raw(dst); + if ((!dst || !n) && daddr) { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3871ac663554..a8d2a891b840 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -258,11 +258,15 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n = NULL; + netif_tx_unlock_bh(dev); skb->dev = dev; - - if (!skb_dst(skb) || !skb_dst(skb)->neighbour) { + if (dst) + n = dst_get_neighbour_raw(dst); + if (!dst || !n) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof (struct ipoib_pseudoheader)); } @@ -715,11 +719,15 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - if (skb_dst(skb) && - skb_dst(skb)->neighbour && - !*to_ipoib_neigh(skb_dst(skb)->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, - skb->dev); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n = NULL; + + rcu_read_lock(); + if (dst) + n = dst_get_neighbour(dst); + if (n && !*to_ipoib_neigh(n)) { + struct ipoib_neigh *neigh = ipoib_neigh_alloc(n, + skb->dev); if (neigh) { kref_get(&mcast->ah->ref); @@ -727,7 +735,7 @@ out: list_add_tail(&neigh->list, &mcast->neigh_list); } } - + rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); return; diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c index 3f2e12c3ac1b..015b5152b0de 100644 --- a/drivers/net/cxgb3/cxgb3_offload.c +++ b/drivers/net/cxgb3/cxgb3_offload.c @@ -971,7 +971,7 @@ static int nb_callback(struct notifier_block *self, unsigned long event, case (NETEVENT_REDIRECT):{ struct netevent_redirect *nr = ctx; cxgb_redirect(nr->old, nr->new); - cxgb_neigh_update(nr->new->neighbour); + cxgb_neigh_update(dst_get_neighbour(nr->new)); break; } default: @@ -1116,8 +1116,8 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) struct l2t_entry *e; struct t3c_tid_entry *te; - olddev = old->neighbour->dev; - newdev = new->neighbour->dev; + olddev = dst_get_neighbour(old)->dev; + newdev = dst_get_neighbour(new)->dev; if (!is_offloading(olddev)) return; if (!is_offloading(newdev)) { @@ -1134,7 +1134,7 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) } /* Add new L2T entry */ - e = t3_l2t_get(tdev, new->neighbour, newdev); + e = t3_l2t_get(tdev, dst_get_neighbour(new), newdev); if (!e) { printk(KERN_ERR "%s: couldn't allocate new l2t entry!\n", __func__); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index fd69da3fa6b4..e2c9ac5fcb36 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2742,9 +2742,14 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb) { int cast_type = RTN_UNSPEC; - - if (skb_dst(skb) && skb_dst(skb)->neighbour) { - cast_type = skb_dst(skb)->neighbour->type; + struct neighbour *n = NULL; + struct dst_entry *dst; + + dst = skb_dst(skb); + if (dst) + n = dst_get_neighbour(dst); + if (n) { + cast_type = n->type; if ((cast_type == RTN_BROADCAST) || (cast_type == RTN_MULTICAST) || (cast_type == RTN_ANYCAST)) @@ -2787,6 +2792,9 @@ int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb) static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, struct sk_buff *skb, int ipv, int cast_type) { + struct neighbour *n = NULL; + struct dst_entry *dst; + memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; hdr->hdr.l3.ext_flags = 0; @@ -2804,13 +2812,16 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, } hdr->hdr.l3.length = skb->len - sizeof(struct qeth_hdr); + dst = skb_dst(skb); + if (dst) + n = dst_get_neighbour(dst); if (ipv == 4) { /* IPv4 */ hdr->hdr.l3.flags = qeth_l3_get_qeth_hdr_flags4(cast_type); memset(hdr->hdr.l3.dest_addr, 0, 12); - if ((skb_dst(skb)) && (skb_dst(skb)->neighbour)) { + if (n) { *((u32 *) (&hdr->hdr.l3.dest_addr[12])) = - *((u32 *) skb_dst(skb)->neighbour->primary_key); + *((u32 *) n->primary_key); } else { /* fill in destination address used in ip header */ *((u32 *) (&hdr->hdr.l3.dest_addr[12])) = @@ -2821,9 +2832,9 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, hdr->hdr.l3.flags = qeth_l3_get_qeth_hdr_flags6(cast_type); if (card->info.type == QETH_CARD_TYPE_IQD) hdr->hdr.l3.flags &= ~QETH_HDR_PASSTHRU; - if ((skb_dst(skb)) && (skb_dst(skb)->neighbour)) { + if (n) { memcpy(hdr->hdr.l3.dest_addr, - skb_dst(skb)->neighbour->primary_key, 16); + n->primary_key, 16); } else { /* fill in destination address used in ip header */ memcpy(hdr->hdr.l3.dest_addr, diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index b2d661147a43..143f2682bdab 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -985,7 +985,7 @@ static int init_act_open(struct cxgbi_sock *csk) csk->saddr.sin_addr.s_addr = chba->ipv4addr; csk->rss_qid = 0; - csk->l2t = t3_l2t_get(t3dev, dst->neighbour, ndev); + csk->l2t = t3_l2t_get(t3dev, dst_get_neighbour(dst), ndev); if (!csk->l2t) { pr_err("NO l2t available.\n"); return -EINVAL; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index f3a4cd7cf782..ae13c4993aa3 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1160,7 +1160,7 @@ static int init_act_open(struct cxgbi_sock *csk) cxgbi_sock_set_flag(csk, CTPF_HAS_ATID); cxgbi_sock_get(csk); - csk->l2t = cxgb4_l2t_get(lldi->l2t, csk->dst->neighbour, ndev, 0); + csk->l2t = cxgb4_l2t_get(lldi->l2t, dst_get_neighbour(csk->dst), ndev, 0); if (!csk->l2t) { pr_err("%s, cannot alloc l2t.\n", ndev->name); goto rel_resource; diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index a2a9c7c6c643..77ac217ad5ce 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -492,7 +492,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) goto err_out; } dst = &rt->dst; - ndev = dst->neighbour->dev; + ndev = dst_get_neighbour(dst)->dev; if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { pr_info("multi-cast route %pI4, port %u, dev %s.\n", @@ -506,7 +506,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) ndev = ip_dev_find(&init_net, daddr->sin_addr.s_addr); mtu = ndev->mtu; pr_info("rt dev %s, loopback -> %s, mtu %u.\n", - dst->neighbour->dev->name, ndev->name, mtu); + dst_get_neighbour(dst)->dev->name, ndev->name, mtu); } cdev = cxgbi_device_find_by_netdev(ndev, &port); diff --git a/include/net/arp.h b/include/net/arp.h index 91f0568a04ef..fb0eb9048b19 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -16,6 +16,7 @@ extern void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *dest_hw, const unsigned char *src_hw, const unsigned char *th); extern int arp_bind_neighbour(struct dst_entry *dst); +extern struct neighbour *__arp_bind_neighbour(struct dst_entry *dst, __be32 nexthop); extern int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); extern void arp_ifdown(struct net_device *dev); diff --git a/include/net/dst.h b/include/net/dst.h index e12ddfb9eb16..d0201340bee2 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -37,7 +37,7 @@ struct dst_entry { unsigned long _metrics; unsigned long expires; struct dst_entry *path; - struct neighbour *neighbour; + struct neighbour __rcu *_neighbour; struct hh_cache *hh; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; @@ -86,6 +86,21 @@ struct dst_entry { }; }; +static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst) +{ + return rcu_dereference(dst->_neighbour); +} + +static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst) +{ + return rcu_dereference_raw(dst->_neighbour); +} + +static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) +{ + rcu_assign_pointer(dst->_neighbour, neigh); +} + extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[RTAX_MAX]; @@ -371,8 +386,14 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - if (dst) - neigh_confirm(dst->neighbour); + if (dst) { + struct neighbour *n; + + rcu_read_lock(); + n = dst_get_neighbour(dst); + neigh_confirm(n); + rcu_read_unlock(); + } } static inline void dst_link_failure(struct sk_buff *skb) diff --git a/net/atm/clip.c b/net/atm/clip.c index 1d4be60e1390..5889074e9718 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -364,33 +364,37 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct clip_priv *clip_priv = PRIV(dev); + struct dst_entry *dst = skb_dst(skb); struct atmarp_entry *entry; + struct neighbour *n; struct atm_vcc *vcc; int old; unsigned long flags; pr_debug("(skb %p)\n", skb); - if (!skb_dst(skb)) { + if (!dst) { pr_err("skb_dst(skb) == NULL\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - if (!skb_dst(skb)->neighbour) { + n = dst_get_neighbour(dst); + if (!n) { #if 0 - skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1); - if (!skb_dst(skb)->neighbour) { + n = clip_find_neighbour(skb_dst(skb), 1); + if (!n) { dev_kfree_skb(skb); /* lost that one */ dev->stats.tx_dropped++; return 0; } + dst_set_neighbour(dst, n); #endif pr_err("NO NEIGHBOUR !\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - entry = NEIGH2ENTRY(skb_dst(skb)->neighbour); + entry = NEIGH2ENTRY(n); if (!entry->vccs) { if (time_after(jiffies, entry->expires)) { /* should be resolved */ @@ -407,7 +411,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, } pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", n, vcc); if (entry->vccs->encap) { void *here; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 56149ec36d7f..3dc7f5446a9d 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -343,24 +343,26 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct neighbour *neigh; struct dst_entry *dst; skb->dev = bridge_parent(skb->dev); if (!skb->dev) goto free_skb; dst = skb_dst(skb); + neigh = dst_get_neighbour(dst); if (dst->hh) { neigh_hh_bridge(dst->hh, skb); skb->dev = nf_bridge->physindev; return br_handle_frame_finish(skb); - } else if (dst->neighbour) { + } else if (neigh) { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and * protocol number. */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; - return dst->neighbour->output(skb); + return neigh->output(skb); } free_skb: kfree_skb(skb); diff --git a/net/core/dst.c b/net/core/dst.c index 6135f3671692..8246d47a2184 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); dst->hh = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; @@ -231,7 +231,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); hh = dst->hh; child = dst->child; @@ -240,7 +240,7 @@ again: hh_cache_put(hh); if (neigh) { - dst->neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -367,14 +367,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16db88707804..8c54aff0b0c1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1173,12 +1173,17 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); + + rcu_read_lock(); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb_dst(skb) && skb_dst(skb)->neighbour) - n1 = skb_dst(skb)->neighbour; + if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + n1 = n2; n1->output(skb); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); @@ -1300,10 +1305,10 @@ EXPORT_SYMBOL(neigh_compat_output); int neigh_resolve_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh; + struct neighbour *neigh = dst_get_neighbour(dst); int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; __skb_pull(skb, skb_network_offset(skb)); @@ -1333,7 +1338,7 @@ out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1347,7 +1352,7 @@ int neigh_connected_output(struct sk_buff *skb) { int err; struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; unsigned int seq; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 602dade7e9a3..9810610d26c6 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -208,7 +208,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; @@ -227,7 +227,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) static int dn_long_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; unsigned char *data; @@ -274,7 +274,7 @@ static int dn_long_output(struct sk_buff *skb) static int dn_short_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -318,7 +318,7 @@ static int dn_short_output(struct sk_buff *skb) static int dn_phase3_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 74544bc6fdec..b91b60363c39 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -241,9 +241,11 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { + struct neighbour *n = dst_get_neighbour(dst); u32 min_mtu = 230; - struct dn_dev *dn = dst->neighbour ? - rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; + struct dn_dev *dn; + + dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -715,7 +717,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if ((neigh = dst->neighbour) == NULL) + if ((neigh = dst_get_neighbour(dst)) == NULL) goto error; skb->dev = dev; @@ -750,7 +752,7 @@ static int dn_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); int header_len; #ifdef CONFIG_NETFILTER struct net_device *dev = skb->dev; @@ -833,11 +835,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && rt->dst.neighbour == NULL) { + if (dev != NULL && dst_get_neighbour(&rt->dst) == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->dst.neighbour = n; + dst_set_neighbour(&rt->dst, n); } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1144,7 +1146,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); neigh = NULL; rt->dst.lastuse = jiffies; @@ -1416,7 +1418,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch(res.type) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1b74d3b64371..1d5675efcfbc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -518,26 +518,32 @@ EXPORT_SYMBOL(arp_find); /* END OF OBSOLETE FUNCTIONS */ +struct neighbour *__arp_bind_neighbour(struct dst_entry *dst, __be32 nexthop) +{ + struct net_device *dev = dst->dev; + + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + nexthop = 0; + return __neigh_lookup_errno( +#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) + dev->type == ARPHRD_ATM ? + clip_tbl_hook : +#endif + &arp_tbl, &nexthop, dev); +} + int arp_bind_neighbour(struct dst_entry *dst) { struct net_device *dev = dst->dev; - struct neighbour *n = dst->neighbour; + struct neighbour *n = dst_get_neighbour(dst); if (dev == NULL) return -EINVAL; if (n == NULL) { - __be32 nexthop = ((struct rtable *)dst)->rt_gateway; - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - nexthop = 0; - n = __neigh_lookup_errno( -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) - dev->type == ARPHRD_ATM ? - clip_tbl_hook : -#endif - &arp_tbl, &nexthop, dev); + n = __arp_bind_neighbour(dst, ((struct rtable *)dst)->rt_gateway); if (IS_ERR(n)) return PTR_ERR(n); - dst->neighbour = n; + dst_set_neighbour(dst, n); } return 0; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8871067560db..d7bb94c48345 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { + struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; - struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0c99db4c80b1..51a3eec2c706 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -182,6 +182,8 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; + int res; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -203,10 +205,22 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + rcu_read_lock(); + if (dst->hh) { + int res = neigh_hh_output(dst->hh, skb); + + rcu_read_unlock(); + return res; + } else { + neigh = dst_get_neighbour(dst); + if (neigh) { + res = neigh->output(skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); + } if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4845bfe02d24..65ff2e52a142 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -416,7 +416,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "HHUptod\tSpecDst"); else { struct rtable *r = v; - int len; + struct neighbour *n; + int len, HHUptod; + + rcu_read_lock(); + n = dst_get_neighbour(&r->dst); + HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; + rcu_read_unlock(); seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", @@ -431,8 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, - r->dst.hh ? (r->dst.hh->hh_output == - dev_queue_xmit) : 0, + HHUptod, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); @@ -1688,23 +1693,25 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(rt->dst.neighbour); - rt->dst.neighbour = NULL; - rt->rt_gateway = peer->redirect_learned.a4; - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); + n = __arp_bind_neighbour(&rt->dst, rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); rt->rt_gateway = orig_gw; return -EAGAIN; } else { rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, - rt->dst.neighbour); + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 498b927f68be..0f335c67eb8a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (rt->rt6i_nexthop == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4076a0b14b20..0f9b37a1c1d4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e17596b8407a..9cbf17686a1b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -100,6 +100,8 @@ static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct neighbour *neigh; + int res; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -134,10 +136,22 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + rcu_read_lock(); + if (dst->hh) { + res = neigh_hh_output(dst->hh, skb); + + rcu_read_unlock(); + return res; + } else { + neigh = dst_get_neighbour(dst); + if (neigh) { + res = neigh->output(skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); + } IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -385,6 +399,7 @@ int ip6_forward(struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct neighbour *n; u32 mtu; if (net->ipv6.devconf_all->forwarding == 0) @@ -459,11 +474,10 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb_sec_path(skb)) { + n = dst_get_neighbour(dst); + if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; - struct neighbour *n = dst->neighbour; /* * incoming and outgoing devices are the same @@ -949,8 +963,11 @@ out: static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { - int err; struct net *net = sock_net(sk); +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + struct neighbour *n; +#endif + int err; if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); @@ -976,11 +993,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + rcu_read_lock(); + n = dst_get_neighbour(*dst); + if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1020,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7596f071d308..10a8d411707e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); if (rt) - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (rt && lifetime == 0) { neigh_clone(neigh); @@ -1265,7 +1265,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0ef1f086feb8..e70e902a5d06 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -356,7 +356,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -365,8 +365,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -379,8 +381,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -404,8 +409,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = rt->rt6i_nexthop; + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -422,6 +430,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -745,8 +754,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_add dst_free(&rt->dst); return NULL; } - rt->rt6i_nexthop = neigh; - + dst_set_neighbour(&rt->dst, neigh); } return rt; @@ -760,7 +768,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_a rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -794,7 +802,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1058,7 +1066,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, } rt->rt6i_idev = idev; - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; @@ -1338,12 +1346,12 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_prefsrc.plen = 0; if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(rt->rt6i_nexthop)) { - err = PTR_ERR(rt->rt6i_nexthop); - rt->rt6i_nexthop = NULL; + struct neighbour *neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); goto out; } + dst_set_neighbour(&rt->dst, neigh); } rt->rt6i_flags = cfg->fc_flags; @@ -1574,7 +1582,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->dst.neighbour) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt); @@ -1590,7 +1598,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); - nrt->rt6i_nexthop = neigh_clone(neigh); + dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); if (ip6_ins_rt(nrt)) goto out; @@ -1670,7 +1678,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -2035,7 +2043,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return ERR_CAST(neigh); } - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -2312,6 +2320,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2400,8 +2409,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2585,6 +2597,7 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; + struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2593,12 +2606,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - - if (rt->rt6i_nexthop) { - seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) { + seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 38490d5c7e13..f56acd096598 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -679,7 +679,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -704,7 +704,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 45cd30098e34..4f4c52c0eeb3 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int -__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, + struct net_device *dev, struct netdev_queue *txq, + struct neighbour *mn) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = skb_dst(skb)->neighbour; + struct teql_sched_data *q = qdisc_priv(txq->qdisc); struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } static inline int teql_resolve(struct sk_buff *skb, - struct sk_buff *skb_res, struct net_device *dev) + struct sk_buff *skb_res, + struct net_device *dev, + struct netdev_queue *txq) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *mn; + int res; + if (txq->qdisc == &noop_qdisc) return -ENODEV; - if (dev->header_ops == NULL || - skb_dst(skb) == NULL || - skb_dst(skb)->neighbour == NULL) + if (!dev->header_ops || !dst) return 0; - return __teql_resolve(skb, skb_res, dev); + + rcu_read_lock(); + mn = dst_get_neighbour(dst); + res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + rcu_read_unlock(); + + return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) @@ -307,7 +316,7 @@ restart: continue; } - switch (teql_resolve(skb, skb_res, slave)) { + switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5ce74a385525..7803eb6af414 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1497,7 +1497,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto free_dst; /* Copy neighbour for reachability confirmation */ - dst0->neighbour = neigh_clone(dst->neighbour); + dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour(dst))); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); |