From a1889c0d2039a53ae04abb9f20c62500bd312bf3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 Jul 2011 02:46:01 +0000 Subject: net: adjust array index Convert array index from the loop bound to the loop index. A simplified version of the semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e1,e2,ar; @@ for(e1 = 0; e1 < e2; e1++) { <... ar[ - e2 + e1 ] ...> } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f1d27f6c9351..283c0a26e03f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; for (j=0; jsfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; -- cgit v1.2.3 From f2c31e32b378a6653f8de606149d963baf11d7d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 19:00:53 +0000 Subject: net: fix NULL dereferences in check_peer_redir() Gergely Kalman reported crashes in check_peer_redir(). It appears commit f39925dbde778 (ipv4: Cache learned redirect information in inetpeer.) added a race, leading to possible NULL ptr dereference. Since we can now change dst neighbour, we should make sure a reader can safely use a neighbour. Add RCU protection to dst neighbour, and make sure check_peer_redir() can be called safely by different cpus in parallel. As neighbours are already freed after one RCU grace period, this patch should not add typical RCU penalty (cache cold effects) Many thanks to Gergely for providing a pretty report pointing to the bug. Reported-by: Gergely Kalman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 10 ++++++++-- net/ipv4/route.c | 14 ++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ccaaa851ab42..77d3eded665a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1730689f560e..6afc4eb50591 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(dst_get_neighbour(&rt->dst)); - dst_set_neighbour(&rt->dst, NULL); - rt->rt_gateway = peer->redirect_learned.a4; - rt_bind_neighbour(rt); - n = dst_get_neighbour(&rt->dst); + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); -- cgit v1.2.3 From 6e5714eaf77d79ae1c8b47e3e040ff5411b717ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Aug 2011 20:50:44 -0700 Subject: net: Compute protocol sequence numbers and fragment IDs using MD5. Computers have become a lot faster since we compromised on the partial MD4 hash which we use currently for performance reasons. MD5 is a much safer choice, and is inline with both RFC1948 and other ISS generators (OpenBSD, Solaris, etc.) Furthermore, only having 24-bits of the sequence number be truly unpredictable is a very serious limitation. So the periodic regeneration and 8-bit counter have been removed. We compute and use a full 32-bit sequence number. For ipv6, DCCP was found to use a 32-bit truncated initial sequence number (it needs 43-bits) and that is fixed here as well. Reported-by: Dan Kaminsky Tested-by: Willy Tarreau Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/netfilter/nf_nat_proto_common.c | 1 + net/ipv4/route.c | 1 + net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 5 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a3a663..984ec656b03b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include #include +#include #include /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e38213817d0a..86f13c67ea85 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Theory of operations. diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf23a9a..f52d41ea0690 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6afc4eb50591..e3dec1c9f09d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include #endif #include +#include #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e65b69e..1c12b8ec849d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From d547f727df86059104af2234804fdd538e112015 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 22:20:20 -0700 Subject: ipv4: fix the reusing of routing cache entries compare_keys and ip_route_input_common rely on rt_oif for distinguishing of input and output routes with same keys values. But sometimes the input route has also same hash chain (keyed by iif != 0) with the output routes (keyed by orig_oif=0). Problem visible if running with small number of rhash_entries. Fix them to use rt_route_iif instead. By this way input route can not be returned to users that request output route. The patch fixes the ip_rt_bug errors that were reported in ip_local_out context, mostly for 255.255.255.255 destinations. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e3dec1c9f09d..cb7efe0567f0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -731,6 +731,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | + (rt1->rt_route_iif ^ rt2->rt_route_iif) | (rt1->rt_oif ^ rt2->rt_oif) | (rt1->rt_iif ^ rt2->rt_iif)) == 0; } @@ -2321,8 +2322,8 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | (rth->rt_iif ^ iif) | - rth->rt_oif | (rth->rt_key_tos ^ tos)) == 0 && + rt_is_input_route(rth) && rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { -- cgit v1.2.3 From dd23198e58cd35259dd09e8892bbdb90f1d57748 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Sun, 7 Aug 2011 22:31:07 -0700 Subject: ipv4: Fix ip_getsockopt for IP_PKTOPTIONS IP_PKTOPTIONS is broken for 32-bit applications running in COMPAT mode on 64-bit kernels. This happens because msghdr's msg_flags field is always set to zero. When running in COMPAT mode this should be set to MSG_CMSG_COMPAT instead. Signed-off-by: Tiberiu Szocs-Mihai Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ab0c9efd1efa..8905e92f896a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct inet_sock *inet = inet_sk(sk); int val; @@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; @@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ip_getsockopt); - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ -- cgit v1.2.3 From 797fd3913abf2f7036003ab8d3d019cbea41affd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 09:11:00 +0000 Subject: netfilter: TCP and raw fix for ip_route_me_harder TCP in some cases uses different global (raw) socket to send RST and ACK. The transparent flag is not set there. Currently, it is a problem for rerouting after the previous change. Fix it by simplifying the checks in ip_route_me_harder and use FLOWI_FLAG_ANYSRC even for sockets. It looks safe because the initial routing allowed this source address to be used and now we just have to make sure the packet is rerouted. As a side effect this also allows rerouting for normal raw sockets that use spoofed source addresses which was not possible even before we eliminated the ip_route_input call. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/netfilter.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 2e97e3ec1eb7..929b27bdeb79 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - __u8 flags = 0; + __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; unsigned int hh_len; - if (!skb->sk && addr_type != RTN_LOCAL) { - if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(net, saddr); - if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) - flags |= FLOWI_FLAG_ANYSRC; - else - saddr = 0; - } + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. @@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; -- cgit v1.2.3 From 47670b767b1593433b516df7798df03f858278be Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 09:16:09 +0000 Subject: ipv4: route non-local sources for raw socket The raw sockets can provide source address for routing but their privileges are not considered. We can provide non-local source address, make sure the FLOWI_FLAG_ANYSRC flag is set if socket has privileges for this, i.e. based on hdrincl (IP_HDRINCL) and transparent flags. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/raw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1457acb39cec..61714bd52925 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + daddr, saddr, 0, 0); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); -- cgit v1.2.3 From d52fbfc9e5c7bb0b0dbc256edf17dee170ce839d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 10:17:22 +0000 Subject: ipv4: use dst with ref during bcast/mcast loopback Make sure skb dst has reference when moving to another context. Currently, I don't see protocols that can hit it when sending broadcasts/multicasts to loopback using noref dsts, so it is just a precaution. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 77d3eded665a..8c6563361ab5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); + skb_dst_force(newskb); netif_rx_ni(newskb); return 0; } -- cgit v1.2.3