From 68d6ac6d2740b6a55f3ae92a4e0be6d881904b32 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 15 Aug 2010 21:20:44 +0000
Subject: netlink: fix compat recvmsg

Since
commit 1dacc76d0014a034b8aca14237c127d7c19d7726
Author: Johannes Berg <johannes@sipsolutions.net>
Date:   Wed Jul 1 11:26:02 2009 +0000

    net/compat/wext: send different messages to compat tasks

we had a race condition when setting and then
restoring frag_list. Eric attempted to fix it,
but the fix created even worse problems.

However, the original motivation I had when I
added the code that turned out to be racy is
no longer clear to me, since we only copy up
to skb->len to userspace, which doesn't include
the frag_list length. As a result, not doing
any frag_list clearing and restoring avoids
the race condition, while not introducing any
other problems.

Additionally, while preparing this patch I found
that since none of the remaining netlink code is
really aware of the frag_list, we need to use the
original skb's information for packet information
and credentials. This fixes, for example, the
group information received by compat tasks.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org [2.6.31+, for 2.6.35 revert 1235f504aa]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 46 ++++++++++++++++------------------------------
 1 file changed, 16 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8648a9922aab..980fe4ad0016 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
-	struct sk_buff *skb, *frag __maybe_unused = NULL;
+	struct sk_buff *skb, *data_skb;
 	int err;
 
 	if (flags&MSG_OOB)
@@ -1418,45 +1418,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
+	data_skb = skb;
+
 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
 	if (unlikely(skb_shinfo(skb)->frag_list)) {
-		bool need_compat = !!(flags & MSG_CMSG_COMPAT);
-
 		/*
-		 * If this skb has a frag_list, then here that means that
-		 * we will have to use the frag_list skb for compat tasks
-		 * and the regular skb for non-compat tasks.
+		 * If this skb has a frag_list, then here that means that we
+		 * will have to use the frag_list skb's data for compat tasks
+		 * and the regular skb's data for normal (non-compat) tasks.
 		 *
-		 * The skb might (and likely will) be cloned, so we can't
-		 * just reset frag_list and go on with things -- we need to
-		 * keep that. For the compat case that's easy -- simply get
-		 * a reference to the compat skb and free the regular one
-		 * including the frag. For the non-compat case, we need to
-		 * avoid sending the frag to the user -- so assign NULL but
-		 * restore it below before freeing the skb.
+		 * If we need to send the compat skb, assign it to the
+		 * 'data_skb' variable so that it will be used below for data
+		 * copying. We keep 'skb' for everything else, including
+		 * freeing both later.
 		 */
-		if (need_compat) {
-			struct sk_buff *compskb = skb_shinfo(skb)->frag_list;
-			skb_get(compskb);
-			kfree_skb(skb);
-			skb = compskb;
-		} else {
-			frag = skb_shinfo(skb)->frag_list;
-			skb_shinfo(skb)->frag_list = NULL;
-		}
+		if (flags & MSG_CMSG_COMPAT)
+			data_skb = skb_shinfo(skb)->frag_list;
 	}
 #endif
 
 	msg->msg_namelen = 0;
 
-	copied = skb->len;
+	copied = data_skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
 
-	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_reset_transport_header(data_skb);
+	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
 
 	if (msg->msg_name) {
 		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
@@ -1476,11 +1466,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 	siocb->scm->creds = *NETLINK_CREDS(skb);
 	if (flags & MSG_TRUNC)
-		copied = skb->len;
-
-#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
-	skb_shinfo(skb)->frag_list = frag;
-#endif
+		copied = data_skb->len;
 
 	skb_free_datagram(sk, skb);
 
-- 
cgit v1.2.3


From f037590fff3005ce8a1513858d7d44f50053cc8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Aug 2010 03:25:00 +0000
Subject: rds: fix a leak of kernel memory

struct rds_rdma_notify contains a 32 bits hole on 64bit arches,
make sure it is zeroed before copying it to user.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/recv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/recv.c b/net/rds/recv.c
index 795a00b7f2cb..c93588c2d553 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -297,7 +297,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
 int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
 {
 	struct rds_notifier *notifier;
-	struct rds_rdma_notify cmsg;
+	struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
 	unsigned int count = 0, max_messages = ~0U;
 	unsigned long flags;
 	LIST_HEAD(copy);
-- 
cgit v1.2.3


From 0ac820eebe9008094040955d294ef7b33b418413 Mon Sep 17 00:00:00 2001
From: Phil Oester <kernel@linuxace.com>
Date: Tue, 17 Aug 2010 18:45:08 +0000
Subject: vlan: Match underlying dev carrier on vlan add

When adding a new vlan, if the underlying interface has no carrier,
then the newly added vlan interface should also have no carrier.
At present, this is not true - the newly added vlan is added with
carrier up.  Fix by checking state of real device.

Signed-off-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3d59c9bf8feb..3bccdd12a264 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -510,7 +510,8 @@ static int vlan_dev_open(struct net_device *dev)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
-	netif_carrier_on(dev);
+	if (netif_carrier_ok(real_dev))
+		netif_carrier_on(dev);
 	return 0;
 
 clear_allmulti:
-- 
cgit v1.2.3


From 79c5f51c639021f7472591239c3867cee4b9ec02 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 18 Aug 2010 00:24:43 +0000
Subject: irda: fix a race in irlan_eth_xmit()

After skb is queued, its illegal to dereference it.

Cache skb->len into a temporary variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlan/irlan_eth.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 9616c32d1076..5bb8353105cc 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -169,6 +169,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 {
 	struct irlan_cb *self = netdev_priv(dev);
 	int ret;
+	unsigned int len;
 
 	/* skb headroom large enough to contain all IrDA-headers? */
 	if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) {
@@ -188,6 +189,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 
 	dev->trans_start = jiffies;
 
+	len = skb->len;
 	/* Now queue the packet in the transport layer */
 	if (self->use_udata)
 		ret = irttp_udata_request(self->tsap_data, skb);
@@ -209,7 +211,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 		self->stats.tx_dropped++;
 	} else {
 		self->stats.tx_packets++;
-		self->stats.tx_bytes += skb->len;
+		self->stats.tx_bytes += len;
 	}
 
 	return NETDEV_TX_OK;
-- 
cgit v1.2.3


From cca77b7c81876d819a5806f408b3c29b5b61a815 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 23 Aug 2010 14:41:22 -0700
Subject: netfilter: fix CONFIG_COMPAT support

commit f3c5c1bfd430858d3a05436f82c51e53104feb6b
(netfilter: xtables: make ip_tables reentrant) forgot to
also compute the jumpstack size in the compat handlers.

Result is that "iptables -I INPUT -j userchain" turns into -j DROP.

Reported by Sebastian Roesner on #netfilter, closes
http://bugzilla.netfilter.org/show_bug.cgi?id=669.

Note: arptables change is compile-tested only.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 3 +++
 net/ipv4/netfilter/ip_tables.c  | 3 +++
 net/ipv6/netfilter/ip6_tables.c | 3 +++
 3 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 51d6c3167975..e8f4f9a57f12 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1420,6 +1420,9 @@ static int translate_compat_table(const char *name,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(arpt_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 97b64b22c412..d163f2e3b2e9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1751,6 +1751,9 @@ translate_compat_table(struct net *net,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(ipt_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 29a7bca29e3f..8e754be92c24 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1766,6 +1766,9 @@ translate_compat_table(struct net *net,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(ip6t_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
-- 
cgit v1.2.3


From 4c3a76abd379d9a4668b2d417baa991de9757dc2 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 19:03:26 +0000
Subject: bridge: netfilter: fix a memory leak

nf_bridge_alloc() always reset the skb->nf_bridge, so we should always
put the old one.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2c911c0759c2..5ed00bd7009f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -162,8 +162,8 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 		if (tmp) {
 			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
 			atomic_set(&tmp->use, 1);
-			nf_bridge_put(nf_bridge);
 		}
+		nf_bridge_put(nf_bridge);
 		nf_bridge = tmp;
 	}
 	return nf_bridge;
-- 
cgit v1.2.3


From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 25 Aug 2010 02:27:49 -0700
Subject: tcp: Combat per-cpu skew in orphan tests.

As reported by Anton Blanchard when we use
percpu_counter_read_positive() to make our orphan socket limit checks,
the check can be off by up to num_cpus_online() * batch (which is 32
by default) which on a 128 cpu machine can be as large as the default
orphan limit itself.

Fix this by doing the full expensive sum check if the optimized check
triggers.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/tcp.c       | 5 +----
 net/ipv4/tcp_timer.c | 8 ++++----
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea77..197b9b77fa3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2011,11 +2011,8 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
-		int orphan_count = percpu_counter_read_positive(
-						sk->sk_prot->orphan_count);
-
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk, orphan_count)) {
+		if (tcp_too_many_orphans(sk, 0)) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f5..c35b469e851c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk)
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = percpu_counter_read_positive(&tcp_orphan_count);
+	int shift = 0;
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
-		orphans <<= 1;
+		shift++;
 
 	/* If some dubious ICMP arrived, penalize even more. */
 	if (sk->sk_err_soft)
-		orphans <<= 1;
+		shift++;
 
-	if (tcp_too_many_orphans(sk, orphans)) {
+	if (tcp_too_many_orphans(sk, shift)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 
-- 
cgit v1.2.3


From c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Aug 2010 23:02:17 -0700
Subject: tcp: fix three tcp sysctls tuning

As discovered by Anton Blanchard, current code to autotune
tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and
sysctl_max_syn_backlog makes little sense.

The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB
machine in Anton's case.

(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))
is much bigger if spinlock debugging is on. Its wrong to select bigger
limits in this case (where kernel structures are also bigger)

bhash_size max is 65536, and we get this value even for small machines.

A better ground is to use size of ehash table, this also makes code
shorter and more obvious.

Based on a patch from Anton, and another from David.

Reported-and-tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 197b9b77fa3e..e2add5ff9cb1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3209,7 +3209,7 @@ void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
-	int order, i, max_share;
+	int i, max_share, cnt;
 	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3258,22 +3258,12 @@ void __init tcp_init(void)
 		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
 	}
 
-	/* Try to be a bit smarter and adjust defaults depending
-	 * on available memory.
-	 */
-	for (order = 0; ((1 << order) << PAGE_SHIFT) <
-			(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
-			order++)
-		;
-	if (order >= 4) {
-		tcp_death_row.sysctl_max_tw_buckets = 180000;
-		sysctl_tcp_max_orphans = 4096 << (order - 4);
-		sysctl_max_syn_backlog = 1024;
-	} else if (order < 3) {
-		tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
-		sysctl_tcp_max_orphans >>= (3 - order);
-		sysctl_max_syn_backlog = 128;
-	}
+
+	cnt = tcp_hashinfo.ehash_mask + 1;
+
+	tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
+	sysctl_tcp_max_orphans = cnt / 2;
+	sysctl_max_syn_backlog = max(128, cnt / 256);
 
 	/* Set the pressure threshold to be a fraction of global memory that
 	 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
-- 
cgit v1.2.3


From d84ba638e4ba3c40023ff997aa5e8d3ed002af36 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 Aug 2010 16:05:48 +0000
Subject: tcp: select(writefds) don't hang up when a peer close connection

This issue come from ruby language community. Below test program
hang up when only run on Linux.

	% uname -mrsv
	Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686
	% ruby -rsocket -ve '
	BasicSocket.do_not_reverse_lookup = true
	serv = TCPServer.open("127.0.0.1", 0)
	s1 = TCPSocket.open("127.0.0.1", serv.addr[1])
	s2 = serv.accept
	s2.close
	s1.write("a") rescue p $!
	s1.write("a") rescue p $!
	Thread.new {
	  s1.write("a")
	}.join'
	ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux]
	#<Errno::EPIPE: Broken pipe>
	[Hang Here]

FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call
select() internally. and tcp_poll has a bug.

SUS defined 'ready for writing' of select() as following.

|  A descriptor shall be considered ready for writing when a call to an output
|  function with O_NONBLOCK clear would not block, whether or not the function
|  would transfer data successfully.

That said, EPIPE situation is clearly one of 'ready for writing'.

We don't have read-side issue because tcp_poll() already has read side
shutdown care.

|        if (sk->sk_shutdown & RCV_SHUTDOWN)
|                mask |= POLLIN | POLLRDNORM | POLLRDHUP;

So, Let's insert same logic in write side.

- reference url
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e2add5ff9cb1..3fb1428e526e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,7 +451,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 					mask |= POLLOUT | POLLWRNORM;
 			}
-		}
+		} else
+			mask |= POLLOUT | POLLWRNORM;
 
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;
-- 
cgit v1.2.3


From bfc960a8eec023a170a80697fe65157cd4f44f81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Aug 2010 23:44:35 +0000
Subject: l2tp: test for ethernet header in l2tp_eth_dev_recv()

close https://bugzilla.kernel.org/show_bug.cgi?id=16529

Before calling dev_forward_skb(), we should make sure skb head contains
at least an ethernet header, even if length included in upper layer said
so. Use pskb_may_pull() to make sure this ethernet header is present in
skb head.

Reported-by: Thomas Heil <heil@terminal-consulting.de>
Reported-by: Ian Campbell <Ian.Campbell@eu.citrix.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 58c6c4cda73b..1ae697681bc7 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
 		printk("\n");
 	}
 
-	if (data_len < ETH_HLEN)
+	if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
 		goto error;
 
 	secpath_reset(skb);
-- 
cgit v1.2.3


From d71b0e9c0028f3af910226f995e0074873e16979 Mon Sep 17 00:00:00 2001
From: Bernard Pidoux F6BVP <f6bvp@free.fr>
Date: Thu, 26 Aug 2010 11:40:00 +0000
Subject: ax25: missplaced sock_put(sk)

This patch moves a missplaced sock_put(sk) after
bh_unlock_sock(sk)
like in other parts of AX25 driver.

Signed-off-by: Bernard Pidoux <f6bvp@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_ds_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 2ce79df00680..c7d81436213d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -112,8 +112,8 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 			if (sk) {
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
-				sock_put(sk);
 				bh_unlock_sock(sk);
+				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
 			return;
-- 
cgit v1.2.3


From 7e368739e3b3f1d7944794c178a15f05829b56bc Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 26 Aug 2010 16:11:08 -0700
Subject: net/caif/cfrfml.c: use asm/unaligned.h

caif does not build on ia64 starting with 2.6.32-rc1.  Using
asm/unaligned.h instead of linux/unaligned/le_byteshift.h fixes the issue.

include/linux/unaligned/le_byteshift.h:40:50: error: redefinition of 'get_unaligned_le16'
include/linux/unaligned/le_byteshift.h:45:50: error: redefinition of 'get_unaligned_le32'
include/linux/unaligned/le_byteshift.h:50:50: error: redefinition of 'get_unaligned_le64'
include/linux/unaligned/le_byteshift.h:55:51: error: redefinition of 'put_unaligned_le16'
include/linux/unaligned/le_byteshift.h:60:51: error: redefinition of 'put_unaligned_le32'
include/linux/unaligned/le_byteshift.h:65:51: error: redefinition of 'put_unaligned_le64'
include/linux/unaligned/le_struct.h:31:51: note: previous definition of 'put_unaligned_le64' was here

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfrfml.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index eb1602022ac0..9a699242d104 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,7 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-#include <linux/unaligned/le_byteshift.h>
+#include <asm/unaligned.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
-- 
cgit v1.2.3


From c34186ed008229e7f7e3f1de8e6acf6374995358 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 27 Aug 2010 19:31:56 -0700
Subject: net/ipv4: Eliminate kstrdup memory leak

The string clone is only used as a temporary copy of the argument val
within the while loop, and so it should be freed before leaving the
function.  The call to strsep, however, modifies clone, so a pointer to the
front of the string is kept in saved_clone, to make it possible to free it.

The sematic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E;
identifier l;
statement S;
@@

*x= \(kasprintf\|kstrdup\)(...);
...
if (x == NULL) S
... when != kfree(x)
    when != E = x
if (...) {
  <... when != kfree(x)
* goto l;
  ...>
* return ...;
}
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 0ec9bd0ae94f..850c737e08e2 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -196,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
 int tcp_set_allowed_congestion_control(char *val)
 {
 	struct tcp_congestion_ops *ca;
-	char *clone, *name;
+	char *saved_clone, *clone, *name;
 	int ret = 0;
 
-	clone = kstrdup(val, GFP_USER);
+	saved_clone = clone = kstrdup(val, GFP_USER);
 	if (!clone)
 		return -ENOMEM;
 
@@ -226,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val)
 	}
 out:
 	spin_unlock(&tcp_cong_list_lock);
+	kfree(saved_clone);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 071249b1d501b1f31a6b1af3fbcbe03158a84e5c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 25 Aug 2010 14:47:38 +0200
Subject: mac80211: delete work timer

The new workqueue changes helped me find this bug
that's been lingering since the changes to the work
processing in mac80211 -- the work timer is never
deleted properly. Do that to avoid having it fire
after all data structures have been freed. It can't
be re-armed because all it will do, if running, is
schedule the work, but that gets flushed later and
won't have anything to do since all work items are
gone by now (by way of interface removal).

Cc: stable@kernel.org [2.6.34+]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 798a91b100cc..ded5c3843e06 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -732,6 +732,12 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
 	rtnl_unlock();
 
+	/*
+	 * Now all work items will be gone, but the
+	 * timer might still be armed, so delete it
+	 */
+	del_timer_sync(&local->work_timer);
+
 	cancel_work_sync(&local->reconfig_filter);
 
 	ieee80211_clear_tx_pending(local);
-- 
cgit v1.2.3


From 42da2f948d949efd0111309f5827bf0298bcc9a4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 30 Aug 2010 12:24:54 +0200
Subject: wireless extensions: fix kernel heap content leak

Wireless extensions have an unfortunate, undocumented
requirement which requires drivers to always fill
iwp->length when returning a successful status. When
a driver doesn't do this, it leads to a kernel heap
content leak when userspace offers a larger buffer
than would have been necessary.

Arguably, this is a driver bug, as it should, if it
returns 0, fill iwp->length, even if it separately
indicated that the buffer contents was not valid.

However, we can also at least avoid the memory content
leak if the driver doesn't do this by setting the iwp
length to max_tokens, which then reflects how big the
buffer is that the driver may fill, regardless of how
big the userspace buffer is.

To illustrate the point, this patch also fixes a
corresponding cfg80211 bug (since this requirement
isn't documented nor was ever pointed out by anyone
during code review, I don't trust all drivers nor
all cfg80211 handlers to implement it correctly).

Cc: stable@kernel.org [all the way back]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-compat.c |  3 +++
 net/wireless/wext-core.c   | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'net')

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index bb5e0a5ecfa1..7e5c3a45f811 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1420,6 +1420,9 @@ int cfg80211_wext_giwessid(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
+	data->flags = 0;
+	data->length = 0;
+
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 0ef17bc42bac..8f5116f5af19 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -782,6 +782,22 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
 		}
 	}
 
+	if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) {
+		/*
+		 * If this is a GET, but not NOMAX, it means that the extra
+		 * data is not bounded by userspace, but by max_tokens. Thus
+		 * set the length to max_tokens. This matches the extra data
+		 * allocation.
+		 * The driver should fill it with the number of tokens it
+		 * provided, and it may check iwp->length rather than having
+		 * knowledge of max_tokens. If the driver doesn't change the
+		 * iwp->length, this ioctl just copies back max_token tokens
+		 * filled with zeroes. Hopefully the driver isn't claiming
+		 * them to be valid data.
+		 */
+		iwp->length = descr->max_tokens;
+	}
+
 	err = handler(dev, info, (union iwreq_data *) iwp, extra);
 
 	iwp->length += essid_compat;
-- 
cgit v1.2.3


From 628e300cccaa628d8fb92aa28cb7530a3d5f2257 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 30 Aug 2010 18:35:24 -0700
Subject: irda: Correctly clean up self->ias_obj on irda_bind() failure.

If irda_open_tsap() fails, the irda_bind() code tries to destroy
the ->ias_obj object by hand, but does so wrongly.

In particular, it fails to a) release the hashbin attached to the
object and b) reset the self->ias_obj pointer to NULL.

Fix both problems by using irias_delete_object() and explicitly
setting self->ias_obj to NULL, just as irda_release() does.

Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 79986a674f6e..fd55b5135de5 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
 	if (err < 0) {
-		kfree(self->ias_obj->name);
-		kfree(self->ias_obj);
+		irias_delete_object(self->ias_obj);
+		self->ias_obj = NULL;
 		goto out;
 	}
 
-- 
cgit v1.2.3


From b963ea89f00f13c648af4082e5efb2172d369727 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 30 Aug 2010 19:08:01 -0700
Subject: netlink: Make NETLINK_USERSOCK work again.

Once we started enforcing the a nl_table[] entry exist for
a protocol, NETLINK_USERSOCK stopped working.  Add a dummy
table entry so that it works again.

Reported-by: Thomas Voegtle <tv@lio96.de>
Tested-by: Thomas Voegtle <tv@lio96.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 980fe4ad0016..cd96ed3ccee4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2102,6 +2102,26 @@ static void __net_exit netlink_net_exit(struct net *net)
 #endif
 }
 
+static void __init netlink_add_usersock_entry(void)
+{
+	unsigned long *listeners;
+	int groups = 32;
+
+	listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
+			    GFP_KERNEL);
+	if (!listeners)
+		panic("netlink_add_usersock_entry: Cannot allocate listneres\n");
+
+	netlink_table_grab();
+
+	nl_table[NETLINK_USERSOCK].groups = groups;
+	nl_table[NETLINK_USERSOCK].listeners = listeners;
+	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
+	nl_table[NETLINK_USERSOCK].registered = 1;
+
+	netlink_table_ungrab();
+}
+
 static struct pernet_operations __net_initdata netlink_net_ops = {
 	.init = netlink_net_init,
 	.exit = netlink_net_exit,
@@ -2150,6 +2170,8 @@ static int __init netlink_proto_init(void)
 		hash->rehash_time = jiffies;
 	}
 
+	netlink_add_usersock_entry();
+
 	sock_register(&netlink_family_ops);
 	register_pernet_subsys(&netlink_net_ops);
 	/* The netlink device handler may be needed early. */
-- 
cgit v1.2.3


From c3d34d5d9654ec9c2510f9341bfb1030b8f029d1 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 30 Aug 2010 17:36:40 -0400
Subject: wireless: register wiphy rfkill w/o holding cfg80211_mutex

Otherwise lockdep complains...

https://bugzilla.kernel.org/show_bug.cgi?id=17311

[ INFO: possible circular locking dependency detected ]
2.6.36-rc2-git4 #12
-------------------------------------------------------
kworker/0:3/3630 is trying to acquire lock:
 (rtnl_mutex){+.+.+.}, at: [<ffffffff813396c7>] rtnl_lock+0x12/0x14

but task is already holding lock:
 (rfkill_global_mutex){+.+.+.}, at: [<ffffffffa014b129>]
rfkill_switch_all+0x24/0x49 [rfkill]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (rfkill_global_mutex){+.+.+.}:
       [<ffffffff81079ad7>] lock_acquire+0x120/0x15b
       [<ffffffff813ae869>] __mutex_lock_common+0x54/0x52e
       [<ffffffff813aede9>] mutex_lock_nested+0x34/0x39
       [<ffffffffa014b4ab>] rfkill_register+0x2b/0x29c [rfkill]
       [<ffffffffa0185ba0>] wiphy_register+0x1ae/0x270 [cfg80211]
       [<ffffffffa0206f01>] ieee80211_register_hw+0x1b4/0x3cf [mac80211]
       [<ffffffffa0292e98>] iwl_ucode_callback+0x9e9/0xae3 [iwlagn]
       [<ffffffff812d3e9d>] request_firmware_work_func+0x54/0x6f
       [<ffffffff81065d15>] kthread+0x8c/0x94
       [<ffffffff8100ac24>] kernel_thread_helper+0x4/0x10

-> #1 (cfg80211_mutex){+.+.+.}:
       [<ffffffff81079ad7>] lock_acquire+0x120/0x15b
       [<ffffffff813ae869>] __mutex_lock_common+0x54/0x52e
       [<ffffffff813aede9>] mutex_lock_nested+0x34/0x39
       [<ffffffffa018605e>] cfg80211_get_dev_from_ifindex+0x1b/0x7c [cfg80211]
       [<ffffffffa0189f36>] cfg80211_wext_giwscan+0x58/0x990 [cfg80211]
       [<ffffffff8139a3ce>] ioctl_standard_iw_point+0x1a8/0x272
       [<ffffffff8139a529>] ioctl_standard_call+0x91/0xa7
       [<ffffffff8139a687>] T.723+0xbd/0x12c
       [<ffffffff8139a727>] wext_handle_ioctl+0x31/0x6d
       [<ffffffff8133014e>] dev_ioctl+0x63d/0x67a
       [<ffffffff8131afd9>] sock_ioctl+0x48/0x21d
       [<ffffffff81102abd>] do_vfs_ioctl+0x4ba/0x509
       [<ffffffff81102b5d>] sys_ioctl+0x51/0x74
       [<ffffffff81009e02>] system_call_fastpath+0x16/0x1b

-> #0 (rtnl_mutex){+.+.+.}:
       [<ffffffff810796b0>] __lock_acquire+0xa93/0xd9a
       [<ffffffff81079ad7>] lock_acquire+0x120/0x15b
       [<ffffffff813ae869>] __mutex_lock_common+0x54/0x52e
       [<ffffffff813aede9>] mutex_lock_nested+0x34/0x39
       [<ffffffff813396c7>] rtnl_lock+0x12/0x14
       [<ffffffffa0185cb5>] cfg80211_rfkill_set_block+0x1a/0x7b [cfg80211]
       [<ffffffffa014aed0>] rfkill_set_block+0x80/0xd5 [rfkill]
       [<ffffffffa014b07e>] __rfkill_switch_all+0x3f/0x6f [rfkill]
       [<ffffffffa014b13d>] rfkill_switch_all+0x38/0x49 [rfkill]
       [<ffffffffa014b821>] rfkill_op_handler+0x105/0x136 [rfkill]
       [<ffffffff81060708>] process_one_work+0x248/0x403
       [<ffffffff81062620>] worker_thread+0x139/0x214
       [<ffffffff81065d15>] kthread+0x8c/0x94
       [<ffffffff8100ac24>] kernel_thread_helper+0x4/0x10

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/wireless/core.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 541e2fff5e9c..d6d046b9f6f2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -475,12 +475,10 @@ int wiphy_register(struct wiphy *wiphy)
 	mutex_lock(&cfg80211_mutex);
 
 	res = device_add(&rdev->wiphy.dev);
-	if (res)
-		goto out_unlock;
-
-	res = rfkill_register(rdev->rfkill);
-	if (res)
-		goto out_rm_dev;
+	if (res) {
+		mutex_unlock(&cfg80211_mutex);
+		return res;
+	}
 
 	/* set up regulatory info */
 	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
@@ -509,13 +507,18 @@ int wiphy_register(struct wiphy *wiphy)
 	cfg80211_debugfs_rdev_add(rdev);
 	mutex_unlock(&cfg80211_mutex);
 
+	/*
+	 * due to a locking dependency this has to be outside of the
+	 * cfg80211_mutex lock
+	 */
+	res = rfkill_register(rdev->rfkill);
+	if (res)
+		goto out_rm_dev;
+
 	return 0;
 
 out_rm_dev:
 	device_del(&rdev->wiphy.dev);
-
-out_unlock:
-	mutex_unlock(&cfg80211_mutex);
 	return res;
 }
 EXPORT_SYMBOL(wiphy_register);
-- 
cgit v1.2.3


From 0f04cfd098fb81fded74e78ea1a1b86cc6c6c31e Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 31 Aug 2010 13:21:42 +0000
Subject: net sched: fix kernel leak in act_police

While reviewing commit 1c40be12f7d8ca1d387510d39787b12e512a7ce8, I
 audited other users of tc_action_ops->dump for information leaks.

 That commit covered almost all of them but act_police still had a leak.

 opt.limit and opt.capab aren't zeroed out before the structure is
 passed out.

 This patch uses the C99 initializers to zero everything unused out.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 537a48732e9e..7ebf7439b478 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -350,22 +350,19 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = a->priv;
-	struct tc_police opt;
-
-	opt.index = police->tcf_index;
-	opt.action = police->tcf_action;
-	opt.mtu = police->tcfp_mtu;
-	opt.burst = police->tcfp_burst;
-	opt.refcnt = police->tcf_refcnt - ref;
-	opt.bindcnt = police->tcf_bindcnt - bind;
+	struct tc_police opt = {
+		.index = police->tcf_index,
+		.action = police->tcf_action,
+		.mtu = police->tcfp_mtu,
+		.burst = police->tcfp_burst,
+		.refcnt = police->tcf_refcnt - ref,
+		.bindcnt = police->tcf_bindcnt - bind,
+	};
+
 	if (police->tcfp_R_tab)
 		opt.rate = police->tcfp_R_tab->rate;
-	else
-		memset(&opt.rate, 0, sizeof(opt.rate));
 	if (police->tcfp_P_tab)
 		opt.peakrate = police->tcfp_P_tab->rate;
-	else
-		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
 	if (police->tcfp_result)
 		NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
-- 
cgit v1.2.3


From 3b2eb6131e2f6ff646abb0fc69648179b8b70216 Mon Sep 17 00:00:00 2001
From: Michal Soltys <soltys@ziu.info>
Date: Mon, 30 Aug 2010 11:34:10 +0000
Subject: net/sched/sch_hfsc.c: initialize parent's cl_cfmin properly in
 init_vf()

This patch fixes init_vf() function, so on each new backlog period parent's
cl_cfmin is properly updated (including further propgation towards the root),
even if the activated leaf has no upperlimit curve defined.

Signed-off-by: Michal Soltys <soltys@ziu.info>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index abd904be4287..47496098d35c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -761,8 +761,8 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 		if (f != cl->cl_f) {
 			cl->cl_f = f;
 			cftree_update(cl);
-			update_cfmin(cl->cl_parent);
 		}
+		update_cfmin(cl->cl_parent);
 	}
 }
 
-- 
cgit v1.2.3


From 928497f0209027ccd649480a38a499fab9c3f6f6 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 31 Aug 2010 05:54:00 +0000
Subject: xfrm_user: avoid a warning with some compiler

Attached is a small patch to remove a warning ("warning: ISO C90 forbids
mixed declarations and code" with gcc 4.3.2).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b14ed4b1f27c..8bae6b22c846 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1801,7 +1801,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_user_expire *ue = nlmsg_data(nlh);
 	struct xfrm_usersa_info *p = &ue->state;
 	struct xfrm_mark m;
-	u32 mark = xfrm_mark_get(attrs, &m);;
+	u32 mark = xfrm_mark_get(attrs, &m);
 
 	x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family);
 
-- 
cgit v1.2.3


From 750e9fad8c7f44e0005ffb7195f72dd76978c2cf Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 31 Aug 2010 05:50:43 +0000
Subject: ipv4: minor fix about RPF in help of Kconfig

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7c3a7d191249..571f8950ed06 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -46,7 +46,7 @@ config IP_ADVANCED_ROUTER
 	  rp_filter on use:
 
 	  echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
-	   and
+	   or
 	  echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
 
 	  Note that some distributions enable it in startup scripts.
-- 
cgit v1.2.3


From 87f94b4e91dc042620c527f3c30c37e5127ef757 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 1 Sep 2010 18:06:39 -0700
Subject: bridge: Clear INET control block of SKBs passed into ip_fragment().

In a similar vain to commit 17762060c25590bfddd68cc1131f28ec720f405f
("bridge: Clear IPCB before possible entry into IP stack")

Any time we call into the IP stack we have to make sure the state
there is as expected by the ipv4 code.

With help from Eric Dumazet and Herbert Xu.

Reported-by: Bandan Das <bandan.das@stratus.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5ed00bd7009f..137f23259a93 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -761,9 +761,11 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
 	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
-	    !skb_is_gso(skb))
+	    !skb_is_gso(skb)) {
+		/* BUG: Should really parse the IP options here. */
+		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 		return ip_fragment(skb, br_dev_queue_push_xmit);
-	else
+	} else
 		return br_dev_queue_push_xmit(skb);
 }
 #else
-- 
cgit v1.2.3


From 3d3be4333fdf6faa080947b331a6a19bce1a4f57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 1 Sep 2010 00:50:51 +0000
Subject: gro: fix different skb headrooms

Packets entering GRO might have different headrooms, even for a given
flow (because of implementation details in drivers, like copybreak).
We cant force drivers to deliver packets with a fixed headroom.

1) fix skb_segment()

skb_segment() makes the false assumption headrooms of fragments are same
than the head. When CHECKSUM_PARTIAL is used, this can give csum_start
errors, and crash later in skb_copy_and_csum_dev()

2) allocate a minimal skb for head of frag_list

skb_gro_receive() uses netdev_alloc_skb(headroom + skb_gro_offset(p)) to
allocate a fresh skb. This adds NET_SKB_PAD to a padding already
provided by netdevice, depending on various things, like copybreak.

Use alloc_skb() to allocate an exact padding, to reduce cache line
needs:
NET_SKB_PAD + NET_IP_ALIGN

bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=16626

Many thanks to Plamen Petrov, testing many debugging patches !
With help of Jarek Poplawski.

Reported-by: Plamen Petrov <pvp-lsts@fs.uni-ruse.bg>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f0d0c3..26396ff67cf9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2573,6 +2573,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		__copy_skb_header(nskb, skb);
 		nskb->mac_len = skb->mac_len;
 
+		/* nskb and skb might have different headroom */
+		if (nskb->ip_summed == CHECKSUM_PARTIAL)
+			nskb->csum_start += skb_headroom(nskb) - headroom;
+
 		skb_reset_mac_header(nskb);
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
@@ -2702,8 +2706,8 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	} else if (skb_gro_len(p) != pinfo->gso_size)
 		return -E2BIG;
 
-	headroom = skb_headroom(p);
-	nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
+	headroom = NET_SKB_PAD + NET_IP_ALIGN;
+	nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
 	if (unlikely(!nskb))
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 7bcbf81a2296a8f71342445560dcbe16100b567c Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 1 Sep 2010 23:07:10 +0000
Subject: ipvs: avoid oops for passive FTP

Fix Passive FTP problem in ip_vs_ftp:

- Do not oops in nf_nat_set_seq_adjust (adjust_tcp_sequence) when
  iptable_nat module is not loaded

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index f228a17ec649..33b329bfc2d2 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -45,6 +45,7 @@
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <linux/gfp.h>
 #include <net/protocol.h>
@@ -359,7 +360,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		buf_len = strlen(buf);
 
 		ct = nf_ct_get(skb, &ctinfo);
-		if (ct && !nf_ct_is_untracked(ct)) {
+		if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) {
 			/* If mangling fails this function will return 0
 			 * which will cause the packet to be dropped.
 			 * Mangling can only fail under memory pressure,
-- 
cgit v1.2.3


From 0b5d404e349c0236b11466c0a4785520c0be6982 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 2 Sep 2010 13:22:11 -0700
Subject: pkt_sched: Fix lockdep warning on est_tree_lock in gen_estimator

This patch fixes a lockdep warning:

[  516.287584] =========================================================
[  516.288386] [ INFO: possible irq lock inversion dependency detected ]
[  516.288386] 2.6.35b #7
[  516.288386] ---------------------------------------------------------
[  516.288386] swapper/0 just changed the state of lock:
[  516.288386]  (&qdisc_tx_lock){+.-...}, at: [<c12eacda>] est_timer+0x62/0x1b4
[  516.288386] but this lock took another, SOFTIRQ-unsafe lock in the past:
[  516.288386]  (est_tree_lock){+.+...}
[  516.288386]
[  516.288386] and interrupts could create inverse lock ordering between them.
...

So, est_tree_lock needs BH protection because it's taken by
qdisc_tx_lock, which is used both in BH and process contexts.
(Full warning with this patch at netdev, 02 Sep 2010.)

Fixes commit: ae638c47dc040b8def16d05dc6acdd527628f231
("pkt_sched: gen_estimator: add a new lock")

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9fbe7f7429b0..6743146e4d6b 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -232,7 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
-	spin_lock(&est_tree_lock);
+	spin_lock_bh(&est_tree_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -243,7 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 
 	list_add_rcu(&est->list, &elist[idx].list);
 	gen_add_node(est);
-	spin_unlock(&est_tree_lock);
+	spin_unlock_bh(&est_tree_lock);
 
 	return 0;
 }
@@ -270,7 +270,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 {
 	struct gen_estimator *e;
 
-	spin_lock(&est_tree_lock);
+	spin_lock_bh(&est_tree_lock);
 	while ((e = gen_find_node(bstats, rate_est))) {
 		rb_erase(&e->node, &est_root);
 
@@ -281,7 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 		list_del_rcu(&e->list);
 		call_rcu(&e->e_rcu, __gen_kill_estimator);
 	}
-	spin_unlock(&est_tree_lock);
+	spin_unlock_bh(&est_tree_lock);
 }
 EXPORT_SYMBOL(gen_kill_estimator);
 
@@ -320,9 +320,9 @@ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 
 	ASSERT_RTNL();
 
-	spin_lock(&est_tree_lock);
+	spin_lock_bh(&est_tree_lock);
 	res = gen_find_node(bstats, rate_est) != NULL;
-	spin_unlock(&est_tree_lock);
+	spin_unlock_bh(&est_tree_lock);
 
 	return res;
 }
-- 
cgit v1.2.3