From 8a533666d1591cf4ea596c6bd710e2fe682cb56a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Feb 2012 16:13:19 -0500 Subject: net: fix NULL dereferences in check_peer_redir() [ Upstream commit d3aaeb38c40e5a6c08dd31a1b64da65c4352be36, along with dependent backports of commits: 69cce1d1404968f78b177a0314f5822d5afdbbfb 9de79c127cccecb11ae6a21ab1499e87aa222880 218fa90f072e4aeff9003d57e390857f4f35513e 580da35a31f91a594f3090b7a2c39b85cb051a12 f7e57044eeb1841847c24aa06766c8290c202583 e049f28883126c689cf95859480d9ee4ab23b7fa ] Gergely Kalman reported crashes in check_peer_redir(). It appears commit f39925dbde778 (ipv4: Cache learned redirect information in inetpeer.) added a race, leading to possible NULL ptr dereference. Since we can now change dst neighbour, we should make sure a reader can safely use a neighbour. Add RCU protection to dst neighbour, and make sure check_peer_redir() can be called safely by different cpus in parallel. As neighbours are already freed after one RCU grace period, this patch should not add typical RCU penalty (cache cold effects) Many thanks to Gergely for providing a pretty report pointing to the bug. Reported-by: Gergely Kalman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 16 ++++--- drivers/infiniband/hw/cxgb3/iwch_cm.c | 16 +++++-- drivers/infiniband/hw/cxgb4/cm.c | 46 +++++++++++--------- drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/infiniband/hw/nes/nes_cm.c | 8 ++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 59 +++++++++++++++++--------- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 24 +++++++---- 7 files changed, 108 insertions(+), 63 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 8e21d457b899..f2a84c6f8543 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -215,7 +215,9 @@ static int addr4_resolve(struct sockaddr_in *src_in, neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev); if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(rt->dst.neighbour, NULL); + rcu_read_lock(); + neigh_event_send(dst_get_neighbour(&rt->dst), NULL); + rcu_read_unlock(); ret = -ENODATA; if (neigh) goto release; @@ -273,14 +275,16 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } - neigh = dst->neighbour; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(dst->neighbour, NULL); + if (neigh) + neigh_event_send(neigh, NULL); ret = -ENODATA; - goto put; + } else { + ret = rdma_copy_addr(addr, dst->dev, neigh->ha); } - - ret = rdma_copy_addr(addr, dst->dev, neigh->ha); + rcu_read_unlock(); put: dst_release(dst); return ret; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 2332dc22aa04..e55ce7a428be 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1328,6 +1328,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *child_ep, *parent_ep = ctx; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int hwtid = GET_TID(req); + struct neighbour *neigh; struct dst_entry *dst; struct l2t_entry *l2t; struct rtable *rt; @@ -1364,7 +1365,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev); + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + l2t = t3_l2t_get(tdev, neigh, neigh->dev); + rcu_read_unlock(); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1874,10 +1878,11 @@ static int is_loopback_dst(struct iw_cm_id *cm_id) int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { - int err = 0; struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct neighbour *neigh; struct iwch_ep *ep; struct rtable *rt; + int err = 0; if (is_loopback_dst(cm_id)) { err = -ENOSYS; @@ -1933,9 +1938,12 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; + rcu_read_lock(); + neigh = dst_get_neighbour(ep->dst); + /* get a l2t entry */ - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour, - ep->dst->neighbour->dev); + ep->l2t = t3_l2t_get(ep->com.tdev, neigh, neigh->dev); + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 31fb44085c9b..daa93e942e16 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1325,6 +1325,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); + struct neighbour *neigh; struct dst_entry *dst; struct l2t_entry *l2t; struct rtable *rt; @@ -1357,11 +1358,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } dst = &rt->dst; - if (dst->neighbour->dev->flags & IFF_LOOPBACK) { + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh->dev->flags & IFF_LOOPBACK) { pdev = ip_dev_find(&init_net, peer_ip); BUG_ON(!pdev); - l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, dst->neighbour, - pdev, 0); + l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0); mtu = pdev->mtu; tx_chan = cxgb4_port_chan(pdev); smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; @@ -1372,18 +1374,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) rss_qid = dev->rdev.lldi.rxq_ids[cxgb4_port_idx(pdev) * step]; dev_put(pdev); } else { - l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, dst->neighbour, - dst->neighbour->dev, 0); + l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, neigh->dev, 0); mtu = dst_mtu(dst); - tx_chan = cxgb4_port_chan(dst->neighbour->dev); - smac_idx = (cxgb4_port_viid(dst->neighbour->dev) & 0x7F) << 1; + tx_chan = cxgb4_port_chan(neigh->dev); + smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; - txq_idx = cxgb4_port_idx(dst->neighbour->dev) * step; - ctrlq_idx = cxgb4_port_idx(dst->neighbour->dev); + txq_idx = cxgb4_port_idx(neigh->dev) * step; + ctrlq_idx = cxgb4_port_idx(neigh->dev); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(dst->neighbour->dev) * step]; + cxgb4_port_idx(neigh->dev) * step]; } + rcu_read_unlock(); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1847,6 +1849,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_ep *ep; struct rtable *rt; struct net_device *pdev; + struct neighbour *neigh; int step; if ((conn_param->ord > c4iw_max_read_depth) || @@ -1908,14 +1911,16 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; + rcu_read_lock(); + neigh = dst_get_neighbour(ep->dst); + /* get a l2t entry */ - if (ep->dst->neighbour->dev->flags & IFF_LOOPBACK) { + if (neigh->dev->flags & IFF_LOOPBACK) { PDBG("%s LOOPBACK\n", __func__); pdev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - ep->dst->neighbour, - pdev, 0); + neigh, pdev, 0); ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; @@ -1930,21 +1935,20 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) dev_put(pdev); } else { ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - ep->dst->neighbour, - ep->dst->neighbour->dev, 0); + neigh, neigh->dev, 0); ep->mtu = dst_mtu(ep->dst); - ep->tx_chan = cxgb4_port_chan(ep->dst->neighbour->dev); - ep->smac_idx = (cxgb4_port_viid(ep->dst->neighbour->dev) & - 0x7F) << 1; + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(ep->dst->neighbour->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(ep->dst->neighbour->dev); + ep->txq_idx = cxgb4_port_idx(neigh->dev) * step; + ep->ctrlq_idx = cxgb4_port_idx(neigh->dev); step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(ep->dst->neighbour->dev) * step]; + cxgb4_port_idx(neigh->dev) * step]; } + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2001f20a4361..23c04ff6519b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1301,7 +1301,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int is_eth; int is_vlan = 0; int is_grh; - u16 vlan; + u16 vlan = 0; send_size = 0; for (i = 0; i < wr->num_sge; ++i) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index e74cdf9ef471..a1f74f6381ba 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1150,9 +1150,11 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi neigh_release(neigh); } - if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) - neigh_event_send(rt->dst.neighbour, NULL); - + if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) { + rcu_read_lock(); + neigh_event_send(dst_get_neighbour(&rt->dst), NULL); + rcu_read_unlock(); + } ip_rt_put(rt); return rc; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 86addca9ddf6..a98c414978ec 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -555,14 +555,17 @@ static int path_rec_start(struct net_device *dev, return 0; } +/* called with rcu_read_lock */ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; + struct neighbour *n; unsigned long flags; - neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev); + n = dst_get_neighbour(skb_dst(skb)); + neigh = ipoib_neigh_alloc(n, skb->dev); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -571,9 +574,9 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, skb_dst(skb)->neighbour->ha + 4); + path = __path_find(dev, n->ha + 4); if (!path) { - path = path_rec_create(dev, skb_dst(skb)->neighbour->ha + 4); + path = path_rec_create(dev, n->ha + 4); if (!path) goto err_path; @@ -607,7 +610,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) } } else { spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); return; } } else { @@ -634,20 +637,24 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); } +/* called with rcu_read_lock */ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(skb->dev); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n; /* Look up path record for unicasts */ - if (skb_dst(skb)->neighbour->ha[4] != 0xff) { + n = dst_get_neighbour(dst); + if (n->ha[4] != 0xff) { neigh_add_path(skb, dev); return; } /* Add in the P_Key for multicasts */ - skb_dst(skb)->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; - skb_dst(skb)->neighbour->ha[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb); + n->ha[8] = (priv->pkey >> 8) & 0xff; + n->ha[9] = priv->pkey & 0xff; + ipoib_mcast_send(dev, n->ha + 4, skb); } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -712,18 +719,23 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; + struct neighbour *n = NULL; unsigned long flags; - if (likely(skb_dst(skb) && skb_dst(skb)->neighbour)) { - if (unlikely(!*to_ipoib_neigh(skb_dst(skb)->neighbour))) { + rcu_read_lock(); + if (likely(skb_dst(skb))) + n = dst_get_neighbour(skb_dst(skb)); + + if (likely(n)) { + if (unlikely(!*to_ipoib_neigh(n))) { ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + goto unlock; } - neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour); + neigh = *to_ipoib_neigh(n); if (unlikely((memcmp(&neigh->dgid.raw, - skb_dst(skb)->neighbour->ha + 4, + n->ha + 4, sizeof(union ib_gid))) || (neigh->dev != dev))) { spin_lock_irqsave(&priv->lock, flags); @@ -740,17 +752,17 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_neigh_free(dev, neigh); spin_unlock_irqrestore(&priv->lock, flags); ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + goto unlock; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - return NETDEV_TX_OK; + goto unlock; } } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); - return NETDEV_TX_OK; + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); + goto unlock; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { @@ -784,13 +796,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) phdr->hwaddr + 4); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - return NETDEV_TX_OK; + goto unlock; } unicast_arp_send(skb, dev, phdr); } } - +unlock: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -812,6 +825,8 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; + struct dst_entry *dst; + struct neighbour *n; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -823,7 +838,11 @@ static int ipoib_hard_header(struct sk_buff *skb, * destination address onto the front of the skb so we can * figure out where to send the packet later. */ - if ((!skb_dst(skb) || !skb_dst(skb)->neighbour) && daddr) { + dst = skb_dst(skb); + n = NULL; + if (dst) + n = dst_get_neighbour_raw(dst); + if ((!dst || !n) && daddr) { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3871ac663554..a8d2a891b840 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -258,11 +258,15 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n = NULL; + netif_tx_unlock_bh(dev); skb->dev = dev; - - if (!skb_dst(skb) || !skb_dst(skb)->neighbour) { + if (dst) + n = dst_get_neighbour_raw(dst); + if (!dst || !n) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof (struct ipoib_pseudoheader)); } @@ -715,11 +719,15 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - if (skb_dst(skb) && - skb_dst(skb)->neighbour && - !*to_ipoib_neigh(skb_dst(skb)->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, - skb->dev); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n = NULL; + + rcu_read_lock(); + if (dst) + n = dst_get_neighbour(dst); + if (n && !*to_ipoib_neigh(n)) { + struct ipoib_neigh *neigh = ipoib_neigh_alloc(n, + skb->dev); if (neigh) { kref_get(&mcast->ah->ref); @@ -727,7 +735,7 @@ out: list_add_tail(&neigh->list, &mcast->neigh_list); } } - + rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); return; -- cgit v1.2.3