From ed0f160ad674407adb3aba499444f71c83289c63 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 May 2010 00:38:56 -0700 Subject: ipmr: off by one in __ipmr_fill_mroute() This fixes a smatch warning: net/ipv4/ipmr.c +1917 __ipmr_fill_mroute(12) error: buffer overflow '(mrt)->vif_table' 32 <= 32 The ipv6 version had the same issue. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6mr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 45889103b3e2..856123fe32f9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1911,7 +1911,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct rtattr *mp_head; /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mfc_parent > MAXVIFS) + if (c->mfc_parent >= MAXVIFS) return -ENOENT; if (VIF_EXISTS(mrt, c->mfc_parent)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index bd9e7d3e9c8e..073071f2b75b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2017,7 +2017,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct rtattr *mp_head; /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mf6c_parent > MAXMIFS) + if (c->mf6c_parent >= MAXMIFS) return -ENOENT; if (MIF_EXISTS(mrt, c->mf6c_parent)) -- cgit v1.2.3 From a56635a56f2afb3d22d9ce07e8f8d69537416b2d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 26 May 2010 05:56:48 +0000 Subject: net/iucv: Add missing spin_unlock Add a spin_unlock missing on the error path. There seems like no reason why the lock should continue to be held if the kzalloc fail. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1; @@ * spin_lock(E1,...); <+... when != E1 if (...) { ... when != E1 * return ...; } ...+> * spin_unlock(E1,...); // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c8b4599a752e..9637e45744fa 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1619,7 +1619,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) save_message: save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); if (!save_msg) - return; + goto out_unlock; save_msg->path = path; save_msg->msg = *msg; -- cgit v1.2.3 From 8a74ad60a546b13bd1096b2a61a7a5c6fd9ae17c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 May 2010 19:20:18 +0000 Subject: net: fix lock_sock_bh/unlock_sock_bh This new sock lock primitive was introduced to speedup some user context socket manipulation. But it is unsafe to protect two threads, one using regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh This patch changes lock_sock_bh to be careful against 'owned' state. If owned is found to be set, we must take the slow path. lock_sock_bh() now returns a boolean to say if the slow path was taken, and this boolean is used at unlock_sock_bh time to call the appropriate unlock function. After this change, BH are either disabled or enabled during the lock_sock_bh/unlock_sock_bh protected section. This might be misleading, so we rename these functions to lock_sock_fast()/unlock_sock_fast(). Reported-by: Anton Blanchard Signed-off-by: Eric Dumazet Tested-by: Anton Blanchard Signed-off-by: David S. Miller --- net/core/datagram.c | 6 ++++-- net/core/sock.c | 33 +++++++++++++++++++++++++++++++++ net/ipv4/udp.c | 14 ++++++++------ net/ipv6/udp.c | 5 +++-- 4 files changed, 48 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index e0097531417a..f5b6f43a4c2e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { + bool slow; + if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; - lock_sock_bh(sk); + slow = lock_sock_fast(sk); skb_orphan(skb); sk_mem_reclaim_partial(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 37fe9b6adade..2cf7f9f7e775 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk) } EXPORT_SYMBOL(release_sock); +/** + * lock_sock_fast - fast version of lock_sock + * @sk: socket + * + * This version should be used for very small section, where process wont block + * return false if fast path is taken + * sk_lock.slock locked, owned = 0, BH disabled + * return true if slow path is taken + * sk_lock.slock unlocked, owned = 1, BH enabled + */ +bool lock_sock_fast(struct sock *sk) +{ + might_sleep(); + spin_lock_bh(&sk->sk_lock.slock); + + if (!sk->sk_lock.owned) + /* + * Note : We must disable BH + */ + return false; + + __lock_sock(sk); + sk->sk_lock.owned = 1; + spin_unlock(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + local_bh_enable(); + return true; +} +EXPORT_SYMBOL(lock_sock_fast); + int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct timeval tv; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9de6a698f91d..b9d0d409516f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk) spin_unlock_bh(&rcvq->lock); if (!skb_queue_empty(&list_kill)) { - lock_sock_bh(sk); + bool slow = lock_sock_fast(sk); + __skb_queue_purge(&list_kill); sk_mem_reclaim_partial(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); } return res; } @@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + bool slow; /* * Check any passed addresses @@ -1197,10 +1199,10 @@ out: return err; csum_copy_err: - lock_sock_bh(sk); + slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); if (noblock) return -EAGAIN; @@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { - lock_sock_bh(sk); + bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); } /* diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d7a2c0b836a..87be58673b55 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int err; int is_udplite = IS_UDPLITE(sk); int is_udp4; + bool slow; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -424,7 +425,7 @@ out: return err; csum_copy_err: - lock_sock_bh(sk); + slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -433,7 +434,7 @@ csum_copy_err: UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); if (flags & MSG_DONTWAIT) return -EAGAIN; -- cgit v1.2.3 From a47311380e094bb201be8a818370c73c3f52122c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 May 2010 16:09:39 -0700 Subject: net: fix __neigh_event_send() commit 7fee226ad23 (net: add a noref bit on skb dst) missed one spot where an skb is enqueued, with a possibly not refcounted dst entry. __neigh_event_send() inserts skb into arp_queue, so we must make sure dst entry is refcounted, or dst entry can be freed by garbage collector after caller exits from rcu protected section. Reported-by: Ingo Molnar Tested-by: Ingo Molnar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bff37908bd55..6ba1c0eece03 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } + skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); } rc = 1; -- cgit v1.2.3 From 0aa68271510ae2b221d4b60892103837be63afe4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 27 May 2010 16:14:30 -0700 Subject: ipv6: Add GSO support on forwarding path Currently we disallow GSO packets on the IPv6 forward path. This patch fixes this. Note that I discovered that our existing GSO MTU checks (e.g., IPv4 forwarding) are buggy in that they skip the check altogether, when they really should be checking gso_size + header instead. I have also been lazy here in that I haven't bothered to segment the GSO packet by hand before generating an ICMP message. Someone should add that to be 100% correct. Reported-by: Ralf Baechle Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd963f64e27c..89425af0684c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -507,7 +507,7 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -- cgit v1.2.3 From 50636af715ac1ceb1872bd29a4bdcc68975c3263 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 May 2010 03:41:17 -0700 Subject: xt_tee: use skb_dst_drop() After commit 7fee226a (net: add a noref bit on skb dst), its wrong to use : dst_release(skb_dst(skb)), since we could decrement a refcount while skb dst was not refcounted. We should use skb_dst_drop(skb) instead. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netfilter/xt_TEE.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index d7920d9f49e9..859d9fd429c8 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -76,7 +76,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (ip_route_output_key(net, &rt, &fl) != 0) return false; - dst_release(skb_dst(skb)); + skb_dst_drop(skb); skb_dst_set(skb, &rt->u.dst); skb->dev = rt->u.dst.dev; skb->protocol = htons(ETH_P_IP); @@ -157,7 +157,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (dst == NULL) return false; - dst_release(skb_dst(skb)); + skb_dst_drop(skb); skb_dst_set(skb, dst); skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); -- cgit v1.2.3 From 8ca9418350eccd5dd2659931807c1901224dd638 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 May 2010 03:42:18 -0700 Subject: netlink: bug fix: don't overrun skbs on vf_port dump Noticed by Patrick McHardy: was continuing to fill skb after a nla_put_failure, ignoring the size calculated by upper layer. Now, return -EMSGSIZE on any overruns, but also allow netdev to fail ndo_get_vf_port with error other than -EMSGSIZE, thus unwinding nest. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7ab86f3a1ea4..7331bb2f6b9c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -722,14 +722,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { vf_port = nla_nest_start(skb, IFLA_VF_PORT); - if (!vf_port) { - nla_nest_cancel(skb, vf_ports); - return -EMSGSIZE; - } + if (!vf_port) + goto nla_put_failure; NLA_PUT_U32(skb, IFLA_PORT_VF, vf); err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); + if (err == -EMSGSIZE) + goto nla_put_failure; if (err) { -nla_put_failure: nla_nest_cancel(skb, vf_port); continue; } @@ -739,6 +738,10 @@ nla_put_failure: nla_nest_end(skb, vf_ports); return 0; + +nla_put_failure: + nla_nest_cancel(skb, vf_ports); + return -EMSGSIZE; } static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) @@ -753,7 +756,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); if (err) { nla_nest_cancel(skb, port_self); - return err; + return (err == -EMSGSIZE) ? err : 0; } nla_nest_end(skb, port_self); -- cgit v1.2.3 From 045de01a174d9f0734f657eb4b3313d89b4fd5ad Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 May 2010 03:42:43 -0700 Subject: netlink: bug fix: wrong size was calculated for vfinfo list blob The wrong size was being calculated for vfinfo. In one case, it was over- calculating using nlmsg_total_size on attrs, in another case, it was under-calculating by assuming ifla_vf_* structs are packed together, but each struct is it's own attr w/ hdr (and padding). Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7331bb2f6b9c..1a2af24e9e3d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev) if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { int num_vfs = dev_num_vf(dev->dev.parent); - size_t size = nlmsg_total_size(sizeof(struct nlattr)); - size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); - size += num_vfs * (sizeof(struct ifla_vf_mac) + - sizeof(struct ifla_vf_vlan) + - sizeof(struct ifla_vf_tx_rate)); + size_t size = nla_total_size(sizeof(struct nlattr)); + size += nla_total_size(num_vfs * sizeof(struct nlattr)); + size += num_vfs * + (nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(sizeof(struct ifla_vf_tx_rate))); return size; } else return 0; -- cgit v1.2.3