From 648ae8e53d58ed1b667db173a2d4ff2132a3b529 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 9 Nov 2011 19:37:35 +0100
Subject: netfilter: ipset: suppress compile-time warnings in
 ip_set_hash_ipport*.c

warning: 'ip_to' may be used uninitialized in this function

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_ipport.c    | 2 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c  | 2 +-
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 6ee10f5d59bd..37d667e3f6f8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -158,7 +158,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct ip_set_hash *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem data = { };
-	u32 ip, ip_to, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to;
 	u32 timeout = h->timeout;
 	bool with_ports = false;
 	int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index fb90e344e907..e69e2718fbe1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -162,7 +162,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct ip_set_hash *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem data = { };
-	u32 ip, ip_to, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to;
 	u32 timeout = h->timeout;
 	bool with_ports = false;
 	int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index deb3e3dfa5fc..64199b4e93c9 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -184,7 +184,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct ip_set_hash *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
-	u32 ip, ip_to, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to;
 	u32 ip2_from = 0, ip2_to, ip2_last, ip2;
 	u32 timeout = h->timeout;
 	bool with_ports = false;
-- 
cgit v1.2.3


From 5e2afba4ecd7931ea06e6fa116ab28e6943dbd42 Mon Sep 17 00:00:00 2001
From: Paul Guo <ggang@tilera.com>
Date: Mon, 14 Nov 2011 19:00:54 +0800
Subject: netfilter: possible unaligned packet header in ip_route_me_harder

This patch tries to fix the following issue in netfilter:
In ip_route_me_harder(), we invoke pskb_expand_head() that
rellocates new header with additional head room which can break
the alignment of the original packet header.

In one of my NAT test case, the NIC port for internal hosts is
configured with vlan and the port for external hosts is with
general configuration. If we ping an external "unknown" hosts from an
internal host, an icmp packet will be sent. We find that in
icmp_send()->...->ip_route_me_harder()->pskb_expand_head(), hh_len=18
and current headroom (skb_headroom(skb)) of the packet is 16. After
calling pskb_expand_head() the packet header becomes to be unaligned
and then our system (arch/tile) panics immediately.

Signed-off-by: Paul Guo <ggang@tilera.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 9899619ab9b8..4f47e064e262 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -64,7 +64,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	/* Change in oif may mean change in hh_len. */
 	hh_len = skb_dst(skb)->dev->hard_header_len;
 	if (skb_headroom(skb) < hh_len &&
-	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
+				0, GFP_ATOMIC))
 		return -1;
 
 	return 0;
-- 
cgit v1.2.3


From 904603f9b78f94d5a5fe29cf2f9694ef7f6f4420 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 17 Nov 2011 14:53:36 -0800
Subject: mac80211: Fix AMSDU rate printout in debugfs.

It was flipped.  See section 7.3.2.56 of the 802.11n
spec for details.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c5f341798c16..3110cbdc501b 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -274,9 +274,9 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 
 		PRINT_HT_CAP((htc->cap & BIT(10)), "HT Delayed Block Ack");
 
-		PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: "
-			     "3839 bytes");
 		PRINT_HT_CAP(!(htc->cap & BIT(11)), "Max AMSDU length: "
+			     "3839 bytes");
+		PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: "
 			     "7935 bytes");
 
 		/*
-- 
cgit v1.2.3


From 9c8f2c42c93f415771d6d7b87a0881ba0bb72824 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 18 Nov 2011 17:02:16 +0100
Subject: mac80211: Fix endian bug in radiotap header generation

I intoduced this bug in commit a2fe81667410723d941a688e1958a49d67ca3346
"mac80211: Build TX radiotap header dynamically"

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index df643cedf9b9..5533a74e9bb3 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -259,7 +259,7 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_radiotap_header *rthdr;
 	unsigned char *pos;
-	__le16 txflags;
+	u16 txflags;
 
 	rthdr = (struct ieee80211_radiotap_header *) skb_push(skb, rtap_len);
 
@@ -289,13 +289,13 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
 	txflags = 0;
 	if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
 	    !is_multicast_ether_addr(hdr->addr1))
-		txflags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL);
+		txflags |= IEEE80211_RADIOTAP_F_TX_FAIL;
 
 	if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
 	    (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT))
-		txflags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS);
+		txflags |= IEEE80211_RADIOTAP_F_TX_CTS;
 	else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
-		txflags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS);
+		txflags |= IEEE80211_RADIOTAP_F_TX_RTS;
 
 	put_unaligned_le16(txflags, pos);
 	pos += 2;
-- 
cgit v1.2.3


From de3584bd62d87b4c250129fbc46ca52c80330add Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 21 Nov 2011 10:44:00 +0100
Subject: cfg80211: fix regulatory NULL dereference

By the time userspace returns with a response to
the regulatory domain request, the wiphy causing
the request might have gone away. If this is so,
reject the update but mark the request as having
been processed anyway.

Cc: Luis R. Rodriguez <lrodriguez@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bc1ec2c26fd0..186b7f2a27b6 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2035,6 +2035,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	}
 
 	request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
+	if (!request_wiphy) {
+		reg_set_request_processed();
+		return -ENODEV;
+	}
 
 	if (!last_request->intersect) {
 		int r;
-- 
cgit v1.2.3


From 525c6465d449cdec04b3c91733e46027a4d08422 Mon Sep 17 00:00:00 2001
From: "RongQing.Li" <roy.qing.li@gmail.com>
Date: Mon, 21 Nov 2011 16:45:26 -0500
Subject: dccp: fix error propagation in dccp_v4_connect

The errcode is not updated when ip_route_newports() fails.

Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 90a919afbed7..3f4e5414c8e5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -111,6 +111,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 			       inet->inet_sport, inet->inet_dport, sk);
 	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
 		rt = NULL;
 		goto failure;
 	}
-- 
cgit v1.2.3


From f23aa62545c18728eb2b9434aa258be27e07dd49 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 21 Nov 2011 16:46:24 -0500
Subject: caif: fix endian conversion in cffrml_transmit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "tmp" variable here is used to store the result of cpu_to_le16()
so it should be an __le16 instead of an int.  We want the high bits
set and the current code works on little endian systems but not on
big endian systems.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cffrml.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index f39921171d0d..d3ca87bf23b7 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -136,20 +136,21 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
 
 static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
-	int tmp;
 	u16 chks;
 	u16 len;
+	__le16 data;
+
 	struct cffrml *this = container_obj(layr);
 	if (this->dofcs) {
 		chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
-		tmp = cpu_to_le16(chks);
-		cfpkt_add_trail(pkt, &tmp, 2);
+		data = cpu_to_le16(chks);
+		cfpkt_add_trail(pkt, &data, 2);
 	} else {
 		cfpkt_pad_trail(pkt, 2);
 	}
 	len = cfpkt_getlen(pkt);
-	tmp = cpu_to_le16(len);
-	cfpkt_add_head(pkt, &tmp, 2);
+	data = cpu_to_le16(len);
+	cfpkt_add_head(pkt, &data, 2);
 	cfpkt_info(pkt)->hdr_len += 2;
 	if (cfpkt_erroneous(pkt)) {
 		pr_err("Packet is erroneous!\n");
-- 
cgit v1.2.3


From 70e9942f17a6193e9172a804e6569a8806633d6b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Nov 2011 00:16:51 +0100
Subject: netfilter: nf_conntrack: make event callback registration per-netns

This patch fixes an oops that can be triggered following this recipe:

0) make sure nf_conntrack_netlink and nf_conntrack_ipv4 are loaded.
1) container is started.
2) connect to it via lxc-console.
3) generate some traffic with the container to create some conntrack
   entries in its table.
4) stop the container: you hit one oops because the conntrack table
   cleanup tries to report the destroy event to user-space but the
   per-netns nfnetlink socket has already gone (as the nfnetlink
   socket is per-netns but event callback registration is global).

To fix this situation, we make the ctnl_notifier per-netns so the
callback is registered/unregistered if the container is
created/destroyed.

Alex Bligh and Alexey Dobriyan originally proposed one small patch to
check if the nfnetlink socket is gone in nfnetlink_has_listeners,
but this is a very visited path for events, thus, it may reduce
performance and it looks a bit hackish to check for the nfnetlink
socket only to workaround this situation. As a result, I decided
to follow the bigger path choice, which seems to look nicer to me.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Reported-by: Alex Bligh <alex@alex.org.uk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_ecache.c  | 37 +++++++++---------
 net/netfilter/nf_conntrack_netlink.c | 73 +++++++++++++++++++++++++-----------
 2 files changed, 70 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6b368be937c6..b62c4148b921 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -27,22 +27,17 @@
 
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
-struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
-
-struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
-EXPORT_SYMBOL_GPL(nf_expect_event_cb);
-
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 void nf_ct_deliver_cached_events(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned long events;
 	struct nf_ct_event_notifier *notify;
 	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
-	notify = rcu_dereference(nf_conntrack_event_cb);
+	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
 	if (notify == NULL)
 		goto out_unlock;
 
@@ -83,19 +78,20 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 
-int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
+int nf_conntrack_register_notifier(struct net *net,
+				   struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
 	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	RCU_INIT_POINTER(nf_conntrack_event_cb, new);
+	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, new);
 	mutex_unlock(&nf_ct_ecache_mutex);
 	return ret;
 
@@ -105,32 +101,34 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
-void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
+void nf_conntrack_unregister_notifier(struct net *net,
+				      struct nf_ct_event_notifier *new)
 {
 	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	BUG_ON(notify != new);
-	RCU_INIT_POINTER(nf_conntrack_event_cb, NULL);
+	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
+int nf_ct_expect_register_notifier(struct net *net,
+				   struct nf_exp_event_notifier *new)
 {
 	int ret = 0;
 	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_expect_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	RCU_INIT_POINTER(nf_expect_event_cb, new);
+	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, new);
 	mutex_unlock(&nf_ct_ecache_mutex);
 	return ret;
 
@@ -140,15 +138,16 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
-void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
+void nf_ct_expect_unregister_notifier(struct net *net,
+				      struct nf_exp_event_notifier *new)
 {
 	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_expect_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	BUG_ON(notify != new);
-	RCU_INIT_POINTER(nf_expect_event_cb, NULL);
+	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e58aa9b1fe8a..ef21b221f036 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -4,7 +4,7 @@
  * (C) 2001 by Jay Schulist <jschlst@samba.org>
  * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
  * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * Initial connection tracking via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -2163,6 +2163,54 @@ MODULE_ALIAS("ip_conntrack_netlink");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
 
+static int __net_init ctnetlink_net_init(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	int ret;
+
+	ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot register notifier.\n");
+		goto err_out;
+	}
+
+	ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot expect register notifier.\n");
+		goto err_unreg_notifier;
+	}
+#endif
+	return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+	nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+err_out:
+	return ret;
+#endif
+}
+
+static void ctnetlink_net_exit(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
+	nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+#endif
+}
+
+static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list)
+		ctnetlink_net_exit(net);
+}
+
+static struct pernet_operations ctnetlink_net_ops = {
+	.init		= ctnetlink_net_init,
+	.exit_batch	= ctnetlink_net_exit_batch,
+};
+
 static int __init ctnetlink_init(void)
 {
 	int ret;
@@ -2180,28 +2228,15 @@ static int __init ctnetlink_init(void)
 		goto err_unreg_subsys;
 	}
 
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	ret = nf_conntrack_register_notifier(&ctnl_notifier);
-	if (ret < 0) {
-		pr_err("ctnetlink_init: cannot register notifier.\n");
+	if (register_pernet_subsys(&ctnetlink_net_ops)) {
+		pr_err("ctnetlink_init: cannot register pernet operations\n");
 		goto err_unreg_exp_subsys;
 	}
 
-	ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
-	if (ret < 0) {
-		pr_err("ctnetlink_init: cannot expect register notifier.\n");
-		goto err_unreg_notifier;
-	}
-#endif
-
 	return 0;
 
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
-	nf_conntrack_unregister_notifier(&ctnl_notifier);
 err_unreg_exp_subsys:
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
 err_unreg_subsys:
 	nfnetlink_subsys_unregister(&ctnl_subsys);
 err_out:
@@ -2213,11 +2248,7 @@ static void __exit ctnetlink_exit(void)
 	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
 	nf_ct_remove_userspace_expectations();
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
-	nf_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
+	unregister_pernet_subsys(&ctnetlink_net_ops);
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
 }
-- 
cgit v1.2.3


From 717b6d83664646963c71d014c71babaa802333b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 22 Nov 2011 16:03:10 -0500
Subject: net-netlink: fix diag to export IPv4 tos for dual-stack IPv6 sockets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 68e8ac514383..ccee270a9b65 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -108,9 +108,6 @@ static int inet_csk_diag_fill(struct sock *sk,
 		       icsk->icsk_ca_ops->name);
 	}
 
-	if ((ext & (1 << (INET_DIAG_TOS - 1))) && (sk->sk_family != AF_INET6))
-		RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
-
 	r->idiag_family = sk->sk_family;
 	r->idiag_state = sk->sk_state;
 	r->idiag_timer = 0;
@@ -125,16 +122,23 @@ static int inet_csk_diag_fill(struct sock *sk,
 	r->id.idiag_src[0] = inet->inet_rcv_saddr;
 	r->id.idiag_dst[0] = inet->inet_daddr;
 
+	/* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
+	 * hence this needs to be included regardless of socket family.
+	 */
+	if (ext & (1 << (INET_DIAG_TOS - 1)))
+		RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
+
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
+		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
+			RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
+
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
 			       &np->rcv_saddr);
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
 			       &np->daddr);
-		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
-			RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
 	}
 #endif
 
-- 
cgit v1.2.3


From 20e994a05b33b186a22a3b9e922df4cce644daac Mon Sep 17 00:00:00 2001
From: Feng King <kinwin2008@gmail.com>
Date: Mon, 21 Nov 2011 01:47:11 +0000
Subject: net: correct comments of skb_shift

when skb_shift, we want to shift paged data from skb to tgt frag area.
Original comments revert the shift order

Signed-off-by: Feng King <kinwin2008@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 18a3cebb753d..3c30ee4a5710 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2230,7 +2230,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb)
  * @shiftlen: shift up to this many bytes
  *
  * Attempts to shift up to shiftlen worth of bytes, which may be less than
- * the length of the skb, from tgt to skb. Returns number bytes shifted.
+ * the length of the skb, from skb to tgt. Returns number bytes shifted.
  * It's up to caller to free skb if everything was shifted.
  *
  * If @tgt runs out of frags, the whole operation is aborted.
-- 
cgit v1.2.3


From 4d65a2465f6f2694de67777a8aedb1272f473979 Mon Sep 17 00:00:00 2001
From: Li Wei <lw@cn.fujitsu.com>
Date: Wed, 23 Nov 2011 03:51:54 -0500
Subject: ipv6: fix a bug in ndisc_send_redirect

Release skb when transmit rate limit _not_ allow

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 44e5b7f2a6c1..0cb78d7ddaf5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1571,7 +1571,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	}
 	if (!rt->rt6i_peer)
 		rt6_bind_peer(rt, 1);
-	if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+	if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
 		goto release;
 
 	if (dev->addr_len) {
-- 
cgit v1.2.3


From c16a98ed91597b40b22b540c6517103497ef8e74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Nov 2011 15:49:31 -0500
Subject: ipv6: tcp: fix panic in SYN processing

commit 72a3effaf633bc ([NET]: Size listen hash tables using backlog
hint) added a bug allowing inet6_synq_hash() to return an out of bound
array index, because of u16 overflow.

Bug can happen if system admins set net.core.somaxconn &
net.ipv4.tcp_max_syn_backlog sysctls to values greater than 65536

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_connection_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fee46d5a2f12..1567fb120392 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -85,7 +85,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
  * request_sock (formerly open request) hash tables.
  */
 static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
-			   const u32 rnd, const u16 synq_hsize)
+			   const u32 rnd, const u32 synq_hsize)
 {
 	u32 c;
 
-- 
cgit v1.2.3


From 46a246c4dff9f248913e791b69f2336cd8d4ec41 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 23 Nov 2011 16:07:00 -0500
Subject: netfilter: Remove NOTRACK/RAW dependency on NETFILTER_ADVANCED.

Distributions are using this in their default scripts, so don't hide
them behind the advanced setting.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig | 1 -
 net/ipv6/netfilter/Kconfig | 1 -
 net/netfilter/Kconfig      | 1 -
 3 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1dfc18a03fd4..f19f2182894c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -325,7 +325,6 @@ config IP_NF_TARGET_TTL
 # raw + specific targets
 config IP_NF_RAW
 	tristate  'raw table support (required for NOTRACK/TRACE)'
-	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `raw' table to iptables. This table is the very
 	  first in the netfilter framework and hooks in at the PREROUTING
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 448464844a25..f792b34cbe9c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -186,7 +186,6 @@ config IP6_NF_MANGLE
 
 config IP6_NF_RAW
 	tristate  'raw table support (required for TRACE)'
-	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `raw' table to ip6tables. This table is the very
 	  first in the netfilter framework and hooks in at the PREROUTING
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8260b13d93c9..e8f379692294 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -542,7 +542,6 @@ config NETFILTER_XT_TARGET_NOTRACK
 	tristate  '"NOTRACK" target support'
 	depends on IP_NF_RAW || IP6_NF_RAW
 	depends on NF_CONNTRACK
-	depends on NETFILTER_ADVANCED
 	help
 	  The NOTRACK target allows a select rule to specify
 	  which packets *not* to enter the conntrack/NAT
-- 
cgit v1.2.3


From 4d0fe50c75a547088e4304e5eb5f521514dfae46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Nov 2011 17:29:23 -0500
Subject: ipv6: tcp: fix tcp_v6_conn_request()

Since linux 2.6.26 (commit c6aefafb7ec6 : Add IPv6 support to TCP SYN
cookies), we can drop a SYN packet reusing a TIME_WAIT socket.

(As a matter of fact we fail to send the SYNACK answer)

As the client resends its SYN packet after a one second timeout, we
accept it, because first packet removed the TIME_WAIT socket before
being dropped.

This probably explains why nobody ever noticed or complained.

Reported-by: Jesse Young <jlyo@jlyo.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 36131d122a6f..2dea4bb7b54a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1255,6 +1255,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
+	treq->iif = sk->sk_bound_dev_if;
+
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		treq->iif = inet6_iif(skb);
+
 	if (!isn) {
 		struct inet_peer *peer = NULL;
 
@@ -1264,12 +1271,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			atomic_inc(&skb->users);
 			treq->pktopts = skb;
 		}
-		treq->iif = sk->sk_bound_dev_if;
-
-		/* So that link locals have meaning */
-		if (!sk->sk_bound_dev_if &&
-		    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-			treq->iif = inet6_iif(skb);
 
 		if (want_cookie) {
 			isn = cookie_v6_init_sequence(sk, skb, &req->mss);
-- 
cgit v1.2.3


From 685f94e6db8496399c881218018166515445a914 Mon Sep 17 00:00:00 2001
From: Jun Zhao <mypopydev@gmail.com>
Date: Tue, 22 Nov 2011 17:19:03 +0000
Subject: ipv4 : igmp : fix error handle in ip_mc_add_src()

When add sources to interface failure, need to roll back the sfcount[MODE]
to before state. We need to match it corresponding.

Acked-by: David L Stevens <dlstevens@us.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jun Zhao <mypopydev@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c7472eff2d51..b2ca095cb9da 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1716,7 +1716,8 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 	if (err) {
 		int j;
 
-		pmc->sfcount[sfmode]--;
+		if (!delta)
+			pmc->sfcount[sfmode]--;
 		for (j=0; j<i; j++)
 			(void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
 	} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
-- 
cgit v1.2.3


From ac8a48106be49c422575ddc7531b776f8eb49610 Mon Sep 17 00:00:00 2001
From: Li Wei <lw@cn.fujitsu.com>
Date: Tue, 22 Nov 2011 23:33:10 +0000
Subject: ipv4: Save nexthop address of LSRR/SSRR option to IPCB.

We can not update iph->daddr in ip_options_rcv_srr(), It is too early.
When some exception ocurred later (eg. in ip_forward() when goto
sr_failed) we need the ip header be identical to the original one as
ICMP need it.

Add a field 'nexthop' in struct ip_options to save nexthop of LSRR
or SSRR option.

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_forward.c | 2 +-
 net/ipv4/ip_options.c | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3b34d1c86270..29a07b6c7168 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 
-	if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway)
+	if (opt->is_strictroute && opt->nexthop != rt->rt_gateway)
 		goto sr_failed;
 
 	if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 05d20cca9d66..1e60f7679075 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -568,12 +568,13 @@ void ip_forward_options(struct sk_buff *skb)
 		     ) {
 			if (srrptr + 3 > srrspace)
 				break;
-			if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0)
+			if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0)
 				break;
 		}
 		if (srrptr + 3 <= srrspace) {
 			opt->is_changed = 1;
 			ip_rt_get_source(&optptr[srrptr-1], skb, rt);
+			ip_hdr(skb)->daddr = opt->nexthop;
 			optptr[2] = srrptr+4;
 		} else if (net_ratelimit())
 			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -640,7 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	}
 	if (srrptr <= srrspace) {
 		opt->srr_is_hit = 1;
-		iph->daddr = nexthop;
+		opt->nexthop = nexthop;
 		opt->is_changed = 1;
 	}
 	return 0;
-- 
cgit v1.2.3


From 42ca0203fd59aa9be7b241be1fbc3bef1f903f9c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 23 Nov 2011 21:18:20 +0000
Subject: net/netlabel: copy and paste bug in netlbl_cfg_unlbl_map_add()

This was copy and pasted from the IPv4 code.  We're calling the
ip4 version of that function and map4 is NULL.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_kapi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 9c24de10a657..7b372a763c60 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -162,8 +162,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 			map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3];
 			ipv6_addr_copy(&map6->list.mask, mask6);
 			map6->list.valid = 1;
-			ret_val = netlbl_af4list_add(&map4->list,
-						     &addrmap->list4);
+			ret_val = netlbl_af6list_add(&map6->list,
+						     &addrmap->list6);
 			if (ret_val != 0)
 				goto cfg_unlbl_map_add_failure;
 			break;
-- 
cgit v1.2.3


From df07a94cf50eb73d09bf2350c3fe2598e4cbeee1 Mon Sep 17 00:00:00 2001
From: "Jorge Boncompte [DTI2]" <jorge@dti2.net>
Date: Fri, 25 Nov 2011 13:24:49 -0500
Subject: netns: fix proxy ARP entries listing on a netns

Skip entries from foreign network namespaces.

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 039d51e6c284..5ac07d31fbc9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2397,7 +2397,10 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct neigh_table *tbl = state->tbl;
 
-	pn = pn->next;
+	do {
+		pn = pn->next;
+	} while (pn && !net_eq(pneigh_net(pn), net));
+
 	while (!pn) {
 		if (++state->bucket > PNEIGH_HASHMASK)
 			break;
-- 
cgit v1.2.3


From 6b600b26c0215bf9ed04062ecfacf0bc20e2588c Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Nov 2011 02:12:13 +0000
Subject: route: Use the device mtu as the default for blackhole routes

As it is, we return null as the default mtu of blackhole routes.
This may lead to a propagation of a bogus pmtu if the default_mtu
method of a blackhole route is invoked. So return dst->dev->mtu
as the default mtu instead.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 net/ipv6/route.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0c74da8a0473..5b17bf124a33 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2757,7 +2757,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 
 static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
 {
-	return 0;
+	return dst->dev->mtu;
 }
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8473016bba4a..d8fbd18c9467 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -157,7 +157,7 @@ static struct dst_ops ip6_dst_ops_template = {
 
 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 {
-	return 0;
+	return dst->dev->mtu;
 }
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
-- 
cgit v1.2.3


From ebb762f27fed083cb993a0816393aba4615f6544 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Nov 2011 02:12:51 +0000
Subject: net: Rename the dst_opt default_mtu method to mtu

We plan to invoke the dst_opt->default_mtu() method unconditioally
from dst_mtu(). So rename the method to dst_opt->mtu() to match
the name with the new meaning.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c  |  6 +++---
 net/ipv4/route.c       | 10 +++++-----
 net/ipv6/route.c       | 10 +++++-----
 net/xfrm/xfrm_policy.c |  6 +++---
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a77d16158eb6..db4867963247 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,7 +112,7 @@ static unsigned long dn_rt_deadline;
 static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
 static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
-static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
+static unsigned int dn_dst_mtu(const struct dst_entry *dst);
 static void dn_dst_destroy(struct dst_entry *);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
@@ -135,7 +135,7 @@ static struct dst_ops dn_dst_ops = {
 	.gc =			dn_dst_gc,
 	.check =		dn_dst_check,
 	.default_advmss =	dn_dst_default_advmss,
-	.default_mtu =		dn_dst_default_mtu,
+	.mtu =			dn_dst_mtu,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		dn_dst_destroy,
 	.negative_advice =	dn_dst_negative_advice,
@@ -825,7 +825,7 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
 	return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
 }
 
-static unsigned int dn_dst_default_mtu(const struct dst_entry *dst)
+static unsigned int dn_dst_mtu(const struct dst_entry *dst)
 {
 	return dst->dev->mtu;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5b17bf124a33..f1ac3efc5524 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -138,7 +138,7 @@ static int rt_chain_length_max __read_mostly	= 20;
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
-static unsigned int	 ipv4_default_mtu(const struct dst_entry *dst);
+static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
@@ -193,7 +193,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.gc =			rt_garbage_collect,
 	.check =		ipv4_dst_check,
 	.default_advmss =	ipv4_default_advmss,
-	.default_mtu =		ipv4_default_mtu,
+	.mtu =			ipv4_mtu,
 	.cow_metrics =		ipv4_cow_metrics,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
@@ -1814,7 +1814,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 	return advmss;
 }
 
-static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
 	unsigned int mtu = dst->dev->mtu;
 
@@ -2755,7 +2755,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 	return NULL;
 }
 
-static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 {
 	return dst->dev->mtu;
 }
@@ -2775,7 +2775,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.protocol		=	cpu_to_be16(ETH_P_IP),
 	.destroy		=	ipv4_dst_destroy,
 	.check			=	ipv4_blackhole_dst_check,
-	.default_mtu		=	ipv4_blackhole_default_mtu,
+	.mtu			=	ipv4_blackhole_mtu,
 	.default_advmss		=	ipv4_default_advmss,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d8fbd18c9467..76645d7077ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -77,7 +77,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
 				    const struct in6_addr *dest);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
-static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
+static unsigned int	 ip6_mtu(const struct dst_entry *dst);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
@@ -144,7 +144,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.gc_thresh		=	1024,
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
-	.default_mtu		=	ip6_default_mtu,
+	.mtu			=	ip6_mtu,
 	.cow_metrics		=	ipv6_cow_metrics,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
@@ -155,7 +155,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.neigh_lookup		=	ip6_neigh_lookup,
 };
 
-static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 {
 	return dst->dev->mtu;
 }
@@ -175,7 +175,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 	.destroy		=	ip6_dst_destroy,
 	.check			=	ip6_dst_check,
-	.default_mtu		=	ip6_blackhole_default_mtu,
+	.mtu			=	ip6_blackhole_mtu,
 	.default_advmss		=	ip6_default_advmss,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
@@ -1041,7 +1041,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 	return mtu;
 }
 
-static unsigned int ip6_default_mtu(const struct dst_entry *dst)
+static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
 	unsigned int mtu = IPV6_MIN_MTU;
 	struct inet6_dev *idev;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 552df27dcf53..b8be51eb7e29 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2382,7 +2382,7 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
 	return dst_metric_advmss(dst->path);
 }
 
-static unsigned int xfrm_default_mtu(const struct dst_entry *dst)
+static unsigned int xfrm_mtu(const struct dst_entry *dst)
 {
 	return dst_mtu(dst->path);
 }
@@ -2411,8 +2411,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			dst_ops->check = xfrm_dst_check;
 		if (likely(dst_ops->default_advmss == NULL))
 			dst_ops->default_advmss = xfrm_default_advmss;
-		if (likely(dst_ops->default_mtu == NULL))
-			dst_ops->default_mtu = xfrm_default_mtu;
+		if (likely(dst_ops->mtu == NULL))
+			dst_ops->mtu = xfrm_mtu;
 		if (likely(dst_ops->negative_advice == NULL))
 			dst_ops->negative_advice = xfrm_negative_advice;
 		if (likely(dst_ops->link_failure == NULL))
-- 
cgit v1.2.3


From 618f9bc74a039da76fa027ac2600c5b785b964c5 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Nov 2011 02:13:31 +0000
Subject: net: Move mtu handling down to the protocol depended handlers

We move all mtu handling from dst_mtu() down to the protocol
layer. So each protocol can implement the mtu handling in
a different manner.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c  |  4 +++-
 net/ipv4/route.c       | 11 +++++++++--
 net/ipv6/route.c       | 11 +++++++++--
 net/xfrm/xfrm_policy.c |  4 +++-
 4 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index db4867963247..94f4ec036669 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -827,7 +827,9 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
 
 static unsigned int dn_dst_mtu(const struct dst_entry *dst)
 {
-	return dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
 }
 
 static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f1ac3efc5524..11d1b2080a16 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1816,7 +1816,12 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
-	unsigned int mtu = dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	if (mtu)
+		return mtu;
+
+	mtu = dst->dev->mtu;
 
 	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
 		const struct rtable *rt = (const struct rtable *) dst;
@@ -2757,7 +2762,9 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 
 static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 {
-	return dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
 }
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 76645d7077ff..3399dd326287 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -157,7 +157,9 @@ static struct dst_ops ip6_dst_ops_template = {
 
 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 {
-	return dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
 }
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -1043,8 +1045,13 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
-	unsigned int mtu = IPV6_MIN_MTU;
 	struct inet6_dev *idev;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	if (mtu)
+		return mtu;
+
+	mtu = IPV6_MIN_MTU;
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dst->dev);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b8be51eb7e29..2118d6446630 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2384,7 +2384,9 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
 
 static unsigned int xfrm_mtu(const struct dst_entry *dst)
 {
-	return dst_mtu(dst->path);
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst_mtu(dst->path);
 }
 
 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr)
-- 
cgit v1.2.3


From 261663b0ee2ee8e3947f4c11c1a08be18cd2cea1 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 23 Nov 2011 02:14:50 +0000
Subject: ipv4: Don't use the cached pmtu informations for input routes

The pmtu informations on the inetpeer are visible for output and
input routes. On packet forwarding, we might propagate a learned
pmtu to the sender. As we update the pmtu informations of the
inetpeer on demand, the original sender of the forwarded packets
might never notice when the pmtu to that inetpeer increases.
So use the mtu of the outgoing device on packet forwarding instead
of the pmtu to the final destination.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11d1b2080a16..fb47c8f0cd86 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1816,15 +1816,15 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
+	const struct rtable *rt = (const struct rtable *) dst;
 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 
-	if (mtu)
+	if (mtu && rt_is_output_route(rt))
 		return mtu;
 
 	mtu = dst->dev->mtu;
 
 	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
-		const struct rtable *rt = (const struct rtable *) dst;
 
 		if (rt->rt_gateway != rt->rt_dst && mtu > 576)
 			mtu = 576;
-- 
cgit v1.2.3


From 8a6e77d5209e459a9ec5c268c39800c06cd1dc86 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Nov 2011 00:21:55 +0000
Subject: decnet: proper socket refcounting

Better use sk_reset_timer() / sk_stop_timer() helpers to make sure we
dont access already freed/reused memory later.

Reported-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_timer.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 67f691bd4acf..d9c150cc59a9 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -36,16 +36,13 @@ static void dn_slow_timer(unsigned long arg);
 
 void dn_start_slow_timer(struct sock *sk)
 {
-	sk->sk_timer.expires	= jiffies + SLOW_INTERVAL;
-	sk->sk_timer.function	= dn_slow_timer;
-	sk->sk_timer.data	= (unsigned long)sk;
-
-	add_timer(&sk->sk_timer);
+	setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
 }
 
 void dn_stop_slow_timer(struct sock *sk)
 {
-	del_timer(&sk->sk_timer);
+	sk_stop_timer(sk, &sk->sk_timer);
 }
 
 static void dn_slow_timer(unsigned long arg)
@@ -53,12 +50,10 @@ static void dn_slow_timer(unsigned long arg)
 	struct sock *sk = (struct sock *)arg;
 	struct dn_scp *scp = DN_SK(sk);
 
-	sock_hold(sk);
 	bh_lock_sock(sk);
 
 	if (sock_owned_by_user(sk)) {
-		sk->sk_timer.expires = jiffies + HZ / 10;
-		add_timer(&sk->sk_timer);
+		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10);
 		goto out;
 	}
 
@@ -100,9 +95,7 @@ static void dn_slow_timer(unsigned long arg)
 			scp->keepalive_fxn(sk);
 	}
 
-	sk->sk_timer.expires = jiffies + SLOW_INTERVAL;
-
-	add_timer(&sk->sk_timer);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
-- 
cgit v1.2.3


From 71b1391a41289735676be02e35239e5aa9fe6ba6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 25 Nov 2011 06:47:16 +0000
Subject: l2tp: ensure sk->dst is still valid

When using l2tp over ipsec, the tunnel will hang when rekeying
occurs. Reason is that the transformer bundle attached to the dst entry
is now in STATE_DEAD and thus xfrm_output_one() drops all packets
(XfrmOutStateExpired increases).

Fix this by calling __sk_dst_check (which drops the stale dst
if xfrm dst->check callback finds that the bundle is no longer valid).

Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index cf0f308abf5e..89ff8c67943e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1072,7 +1072,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
+	skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
 
 	inet = inet_sk(sk);
 	fl = &inet->cork.fl;
-- 
cgit v1.2.3


From 0884d7aa24e15e72b3c07f7da910a13bb7df3592 Mon Sep 17 00:00:00 2001
From: Alexey Moiseytsev <himeraster@gmail.com>
Date: Mon, 21 Nov 2011 13:35:25 +0000
Subject: AF_UNIX: Fix poll blocking problem when reading from a stream socket

poll() call may be blocked by concurrent reading from the same stream
socket.

Signed-off-by: Alexey Moiseytsev <himeraster@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 466fbcc5cf77..b595a3d8679f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1957,6 +1957,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
 			    (UNIXCB(skb).cred != siocb->scm->cred)) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
+				sk->sk_data_ready(sk, skb->len);
 				break;
 			}
 		} else {
@@ -1974,6 +1975,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		chunk = min_t(unsigned int, skb->len, size);
 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
 			skb_queue_head(&sk->sk_receive_queue, skb);
+			sk->sk_data_ready(sk, skb->len);
 			if (copied == 0)
 				copied = -EFAULT;
 			break;
@@ -1991,6 +1993,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			/* put the skb back if we didn't use it up.. */
 			if (skb->len) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
+				sk->sk_data_ready(sk, skb->len);
 				break;
 			}
 
@@ -2006,6 +2009,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 			/* put message back and return */
 			skb_queue_head(&sk->sk_receive_queue, skb);
+			sk->sk_data_ready(sk, skb->len);
 			break;
 		}
 	} while (size);
-- 
cgit v1.2.3


From de68dca1816660b0d3ac89fa59ffb410007a143f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 26 Nov 2011 12:13:44 +0000
Subject: inet: add a redirect generation id in inetpeer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now inetpeer is the place where we cache redirect information for ipv4
destinations, we must be able to invalidate informations when a route is
added/removed on host.

As inetpeer is not yet namespace aware, this patch adds a shared
redirect_genid, and a per inetpeer redirect_genid. This might be changed
later if inetpeer becomes ns aware.

Cache information for one inerpeer is valid as long as its
redirect_genid has the same value than global redirect_genid.

Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fb47c8f0cd86..5c2847247f51 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -131,6 +131,7 @@ static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 static int rt_chain_length_max __read_mostly	= 20;
+static int redirect_genid;
 
 /*
  *	Interface to generic destination cache.
@@ -837,6 +838,7 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
+	redirect_genid++;
 }
 
 /*
@@ -1391,8 +1393,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				peer = rt->peer;
 				if (peer) {
-					if (peer->redirect_learned.a4 != new_gw) {
+					if (peer->redirect_learned.a4 != new_gw ||
+					    peer->redirect_genid != redirect_genid) {
 						peer->redirect_learned.a4 = new_gw;
+						peer->redirect_genid = redirect_genid;
 						atomic_inc(&__rt_peer_genid);
 					}
 					check_peer_redir(&rt->dst, peer);
@@ -1701,6 +1705,8 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 		if (peer) {
 			check_peer_pmtu(dst, peer);
 
+			if (peer->redirect_genid != redirect_genid)
+				peer->redirect_learned.a4 = 0;
 			if (peer->redirect_learned.a4 &&
 			    peer->redirect_learned.a4 != rt->rt_gateway) {
 				if (check_peer_redir(dst, peer))
@@ -1857,6 +1863,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 		dst_init_metrics(&rt->dst, peer->metrics, false);
 
 		check_peer_pmtu(&rt->dst, peer);
+		if (peer->redirect_genid != redirect_genid)
+			peer->redirect_learned.a4 = 0;
 		if (peer->redirect_learned.a4 &&
 		    peer->redirect_learned.a4 != rt->rt_gateway) {
 			rt->rt_gateway = peer->redirect_learned.a4;
-- 
cgit v1.2.3


From e007b857e88097c96c45620bf3b04a4e309053d1 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Thu, 24 Nov 2011 18:13:56 +0200
Subject: nl80211: fix MAC address validation

MAC addresses have a fixed length. The current
policy allows passing < ETH_ALEN bytes, which
might result in reading beyond the buffer.

Cc: stable@vger.kernel.org
Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b3a476fe8272..ffafda5022c2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -89,8 +89,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
 
-	[NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN },
-	[NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN },
+	[NL80211_ATTR_MAC] = { .len = ETH_ALEN },
+	[NL80211_ATTR_PREV_BSSID] = { .len = ETH_ALEN },
 
 	[NL80211_ATTR_KEY] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
-- 
cgit v1.2.3


From 24f50a9d165745fd0701c6e089d35f58a229ea69 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 24 Nov 2011 20:06:14 +0100
Subject: mac80211: don't stop a single aggregation session twice

Nikolay noticed (by code review) that mac80211 can
attempt to stop an aggregation session while it is
already being stopped. So to fix it, check whether
stop is already being done and bail out if so.

Also move setting the STOPPING state into the lock
so things are properly atomic.

Cc: stable@vger.kernel.org
Reported-by: Nikolay Martynov <mar.kolya@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 2ac033989e01..674b345ade81 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -160,6 +160,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 		return -ENOENT;
 	}
 
+	/* if we're already stopping ignore any new requests to stop */
+	if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+		spin_unlock_bh(&sta->lock);
+		return -EALREADY;
+	}
+
 	if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
 		/* not even started yet! */
 		ieee80211_assign_tid_tx(sta, tid, NULL);
@@ -168,6 +174,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 		return 0;
 	}
 
+	set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
+
 	spin_unlock_bh(&sta->lock);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
@@ -175,8 +183,6 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	       sta->sta.addr, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-	set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
-
 	del_timer_sync(&tid_tx->addba_resp_timer);
 
 	/*
-- 
cgit v1.2.3


From d305a6557b2c4dca0110f05ffe745b1ef94adb80 Mon Sep 17 00:00:00 2001
From: Nikolay Martynov <mar.kolya@gmail.com>
Date: Mon, 28 Nov 2011 09:18:00 +0100
Subject: mac80211: fix race condition caused by late addBA response

If addBA responses comes in just after addba_resp_timer has
expired mac80211 will still accept it and try to open the
aggregation session. This causes drivers to be confused and
in some cases even crash.

This patch fixes the race condition and makes sure that if
addba_resp_timer has expired addBA response is not longer
accepted and we do not try to open half-closed session.

Cc: stable@vger.kernel.org
Signed-off-by: Nikolay Martynov <mar.kolya@gmail.com>
[some adjustments]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 674b345ade81..eea6e5c8d168 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -762,11 +762,27 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		goto out;
 	}
 
-	del_timer(&tid_tx->addba_resp_timer);
+	del_timer_sync(&tid_tx->addba_resp_timer);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid);
 #endif
+
+	/*
+	 * addba_resp_timer may have fired before we got here, and
+	 * caused WANT_STOP to be set. If the stop then was already
+	 * processed further, STOPPING might be set.
+	 */
+	if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) ||
+	    test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG
+		       "got addBA resp for tid %d but we already gave up\n",
+		       tid);
+#endif
+		goto out;
+	}
+
 	/*
 	 * IEEE 802.11-2007 7.3.1.14:
 	 * In an ADDBA Response frame, when the Status Code field
-- 
cgit v1.2.3


From 2a1e0fd175dcfd72096ba9291d31e3b1b5342e60 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 27 Nov 2011 15:29:44 +0200
Subject: mac80211: fix race between the AGG SM and the Tx data path

When a packet is supposed to sent be as an a-MPDU, mac80211 sets
IEEE80211_TX_CTL_AMPDU to let the driver know. On the other
hand, mac80211 configures the driver for aggregration with the
ampdu_action callback.
There is race between these two mechanisms since the following
scenario can occur when the BA agreement is torn down:

Tx softIRQ	 			drv configuration
==========				=================

check OPERATIONAL bit
Set the TX_CTL_AMPDU bit in the packet

					clear OPERATIONAL bit
					stop Tx AGG
Pass Tx packet to the driver.

In that case the driver would get a packet with TX_CTL_AMPDU set
although it has already been notified that the BA session has been
torn down.

To fix this, we need to synchronize all the Qdisc activity after we
cleared the OPERATIONAL bit. After that step, all the following
packets will be buffered until the driver reports it is ready to get
new packets for this RA / TID. This buffering allows not to run into
another race that would send packets with TX_CTL_AMPDU unset while
the driver hasn't been requested to tear down the BA session yet.

This race occurs in practice and iwlwifi complains with a WARN_ON
when it happens.

Cc: stable@kernel.org
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index eea6e5c8d168..331472ce038c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -192,6 +192,20 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	 */
 	clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
 
+	/*
+	 * There might be a few packets being processed right now (on
+	 * another CPU) that have already gotten past the aggregation
+	 * check when it was still OPERATIONAL and consequently have
+	 * IEEE80211_TX_CTL_AMPDU set. In that case, this code might
+	 * call into the driver at the same time or even before the
+	 * TX paths calls into it, which could confuse the driver.
+	 *
+	 * Wait for all currently running TX paths to finish before
+	 * telling the driver. New packets will not go through since
+	 * the aggregation session is no longer OPERATIONAL.
+	 */
+	synchronize_net();
+
 	tid_tx->stop_initiator = initiator;
 	tid_tx->tx_stop = tx;
 
-- 
cgit v1.2.3


From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 27 Nov 2011 21:14:46 +0000
Subject: net: Fix corruption in /proc/*/net/dev_mcast

I just hit this during my testing. Isn't there another bug lurking?

BUG kmalloc-8: Redzone overwritten

INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc
INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896
	.__kmalloc+0x1e0/0x2d0
	.__seq_open_private+0x30/0xa0
	.seq_open_net+0x60/0xe0
	.dev_mc_seq_open+0x4c/0x70
	.proc_reg_open+0xd8/0x260
	.__dentry_open.clone.11+0x2b8/0x400
	.do_last+0xf4/0x950
	.path_openat+0xf8/0x480
	.do_filp_open+0x48/0xc0
	.do_sys_open+0x140/0x250
	syscall_exit+0x0/0x40

dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates
sizeof(struct seq_net_private) of private data, whereas it expects
sizeof(struct dev_iter_state):

struct dev_iter_state {
	struct seq_net_private p;
	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
};

Create dev_seq_open_ops and use it so we don't have to expose
struct dev_iter_state.

[ Problem added by commit f04565ddf52e4 (dev: use name hash for
  dev_seq_ops) -Eric ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c            | 6 ++++++
 net/core/dev_addr_lists.c | 3 +--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1e404c..1482eea0bbf0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4282,6 +4282,12 @@ static int dev_seq_open(struct inode *inode, struct file *file)
 			    sizeof(struct dev_iter_state));
 }
 
+int dev_seq_open_ops(struct inode *inode, struct file *file,
+		     const struct seq_operations *ops)
+{
+	return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+}
+
 static const struct file_operations dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_seq_open,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 277faef9148d..febba516db62 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
+	return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
 }
 
 static const struct file_operations dev_mc_seq_fops = {
-- 
cgit v1.2.3


From 2a38e6d5aed24bb7f0211e0819fac8c32c2b5791 Mon Sep 17 00:00:00 2001
From: Li Wei <lw@cn.fujitsu.com>
Date: Sun, 27 Nov 2011 21:33:34 +0000
Subject: ipv6: Set mcast_hops to IPV6_DEFAULT_MCASTHOPS when -1 was given.

We need to set np->mcast_hops to it's default value at this moment
otherwise when we use it and found it's value is -1, the logic to
get default hop limit doesn't take multicast into account and will
return wrong hop limit(IPV6_DEFAULT_HOPLIMIT) which is for unicast.

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c99e3ee9781f..26cb08c84b74 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -503,7 +503,7 @@ done:
 			goto e_inval;
 		if (val > 255 || val < -1)
 			goto e_inval;
-		np->mcast_hops = val;
+		np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
 		retv = 0;
 		break;
 
-- 
cgit v1.2.3


From c89304b8ea34ab48ba6ae10e06a8b1b8c8212307 Mon Sep 17 00:00:00 2001
From: Xi Wang <xi.wang@gmail.com>
Date: Tue, 29 Nov 2011 09:26:30 +0000
Subject: sctp: better integer overflow check in sctp_auth_create_key()

The check from commit 30c2235c is incomplete and cannot prevent
cases like key_len = 0x80000000 (INT_MAX + 1).  In that case, the
left-hand side of the check (INT_MAX - key_len), which is unsigned,
becomes 0xffffffff (UINT_MAX) and bypasses the check.

However this shouldn't be a security issue.  The function is called
from the following two code paths:

 1) setsockopt()

 2) sctp_auth_asoc_set_secret()

In case (1), sca_keylength is never going to exceed 65535 since it's
bounded by a u16 from the user API.  As such, the key length will
never overflow.

In case (2), sca_keylength is computed based on the user key (1 short)
and 2 * key_vector (3 shorts) for a total of 7 * USHRT_MAX, which still
will not overflow.

In other words, this overflow check is not really necessary.  Just
make it more correct.

Signed-off-by: Xi Wang <xi.wang@gmail.com>
Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 865e68fef21c..bf812048cf6f 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -82,7 +82,7 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp)
 	struct sctp_auth_bytes *key;
 
 	/* Verify that we are not going to overflow INT_MAX */
-	if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes))
+	if (key_len > (INT_MAX - sizeof(struct sctp_auth_bytes)))
 		return NULL;
 
 	/* Allocate the shared key */
-- 
cgit v1.2.3


From 1281bc256543eb610b14fa95ce1967397931a120 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Tue, 29 Nov 2011 10:10:54 +0000
Subject: netlabel: Fix build problems when IPv6 is not enabled

A recent fix to the the NetLabel code caused build problem with
configurations that did not have IPv6 enabled; see below:

 netlabel_kapi.c: In function 'netlbl_cfg_unlbl_map_add':
 netlabel_kapi.c:165:4:
  error: implicit declaration of function 'netlbl_af6list_add'

This patch fixes this problem by making the IPv6 specific code conditional
on the IPv6 configuration flags as we done in the rest of NetLabel and the
network stack as a whole.  We have to move some variable declarations
around as a result so things may not be quite as pretty, but at least it
builds cleanly now.

Some additional IPv6 conditionals were added to the NetLabel code as well
for the sake of consistency.

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_kapi.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 7b372a763c60..824f184f7a9b 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -111,8 +111,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 	struct netlbl_domaddr_map *addrmap = NULL;
 	struct netlbl_domaddr4_map *map4 = NULL;
 	struct netlbl_domaddr6_map *map6 = NULL;
-	const struct in_addr *addr4, *mask4;
-	const struct in6_addr *addr6, *mask6;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
@@ -133,9 +131,9 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 		INIT_LIST_HEAD(&addrmap->list6);
 
 		switch (family) {
-		case AF_INET:
-			addr4 = addr;
-			mask4 = mask;
+		case AF_INET: {
+			const struct in_addr *addr4 = addr;
+			const struct in_addr *mask4 = mask;
 			map4 = kzalloc(sizeof(*map4), GFP_ATOMIC);
 			if (map4 == NULL)
 				goto cfg_unlbl_map_add_failure;
@@ -148,9 +146,11 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 			if (ret_val != 0)
 				goto cfg_unlbl_map_add_failure;
 			break;
-		case AF_INET6:
-			addr6 = addr;
-			mask6 = mask;
+			}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6: {
+			const struct in6_addr *addr6 = addr;
+			const struct in6_addr *mask6 = mask;
 			map6 = kzalloc(sizeof(*map6), GFP_ATOMIC);
 			if (map6 == NULL)
 				goto cfg_unlbl_map_add_failure;
@@ -167,6 +167,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 			if (ret_val != 0)
 				goto cfg_unlbl_map_add_failure;
 			break;
+			}
+#endif /* IPv6 */
 		default:
 			goto cfg_unlbl_map_add_failure;
 			break;
@@ -225,9 +227,11 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
 	case AF_INET:
 		addr_len = sizeof(struct in_addr);
 		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
 		addr_len = sizeof(struct in6_addr);
 		break;
+#endif /* IPv6 */
 	default:
 		return -EPFNOSUPPORT;
 	}
@@ -266,9 +270,11 @@ int netlbl_cfg_unlbl_static_del(struct net *net,
 	case AF_INET:
 		addr_len = sizeof(struct in_addr);
 		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
 		addr_len = sizeof(struct in6_addr);
 		break;
+#endif /* IPv6 */
 	default:
 		return -EPFNOSUPPORT;
 	}
-- 
cgit v1.2.3


From f7e57044eeb1841847c24aa06766c8290c202583 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Nov 2011 04:08:58 +0000
Subject: sch_teql: fix lockdep splat

We need rcu_read_lock() protection before using dst_get_neighbour(), and
we must cache its value (pass it to __teql_resolve())

teql_master_xmit() is called under rcu_read_lock_bh() protection, its
not enough.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_teql.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a3b7120fcc74..4f4c52c0eeb3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 
 
 static int
-__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
+	       struct net_device *dev, struct netdev_queue *txq,
+	       struct neighbour *mn)
 {
-	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
-	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
-	struct neighbour *mn = dst_get_neighbour(skb_dst(skb));
+	struct teql_sched_data *q = qdisc_priv(txq->qdisc);
 	struct neighbour *n = q->ncache;
 
 	if (mn->tbl == NULL)
@@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
 }
 
 static inline int teql_resolve(struct sk_buff *skb,
-			       struct sk_buff *skb_res, struct net_device *dev)
+			       struct sk_buff *skb_res,
+			       struct net_device *dev,
+			       struct netdev_queue *txq)
 {
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+	struct dst_entry *dst = skb_dst(skb);
+	struct neighbour *mn;
+	int res;
+
 	if (txq->qdisc == &noop_qdisc)
 		return -ENODEV;
 
-	if (dev->header_ops == NULL ||
-	    skb_dst(skb) == NULL ||
-	    dst_get_neighbour(skb_dst(skb)) == NULL)
+	if (!dev->header_ops || !dst)
 		return 0;
-	return __teql_resolve(skb, skb_res, dev);
+
+	rcu_read_lock();
+	mn = dst_get_neighbour(dst);
+	res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
+	rcu_read_unlock();
+
+	return res;
 }
 
 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -307,7 +316,7 @@ restart:
 			continue;
 		}
 
-		switch (teql_resolve(skb, skb_res, slave)) {
+		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 		case 0:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
-- 
cgit v1.2.3


From 218fa90f072e4aeff9003d57e390857f4f35513e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 29 Nov 2011 20:05:55 +0000
Subject: ipv4: fix lockdep splat in rt_cache_seq_show

After commit f2c31e32b378 (fix NULL dereferences in check_peer_redir()),
dst_get_neighbour() should be guarded by rcu_read_lock() /
rcu_read_unlock() section.

Reported-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5c2847247f51..57e01bc60947 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -417,9 +417,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 	else {
 		struct rtable *r = v;
 		struct neighbour *n;
-		int len;
+		int len, HHUptod;
 
+		rcu_read_lock();
 		n = dst_get_neighbour(&r->dst);
+		HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
+		rcu_read_unlock();
+
 		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
 			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
 			r->dst.dev ? r->dst.dev->name : "*",
@@ -433,7 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->rt_key_tos,
 			-1,
-			(n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0,
+			HHUptod,
 			r->rt_spec_dst, &len);
 
 		seq_printf(seq, "%*s\n", 127 - len, "");
-- 
cgit v1.2.3


From 8f891489866ec62a87494eff3ed17c88152c32d4 Mon Sep 17 00:00:00 2001
From: "RongQing.Li" <roy.qing.li@gmail.com>
Date: Wed, 30 Nov 2011 23:43:07 -0500
Subject: net/core: fix rollback handler in register_netdevice_notifier

Within nested statements, the break statement terminates only the
do, for, switch, or while statement that immediately encloses it,
So replace the break with goto.

Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1482eea0bbf0..5a13edfc9f73 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1396,7 +1396,7 @@ rollback:
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev == last)
-				break;
+				goto outroll;
 
 			if (dev->flags & IFF_UP) {
 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
@@ -1407,6 +1407,7 @@ rollback:
 		}
 	}
 
+outroll:
 	raw_notifier_chain_unregister(&netdev_chain, nb);
 	goto unlock;
 }
-- 
cgit v1.2.3


From efbc368dcc6426d5430b9b8eeda944cf2cb74b8c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 1 Dec 2011 13:38:59 -0500
Subject: ipv4: Perform peer validation on cached route lookup.

Otherwise we won't notice the peer GENID change.

Reported-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 57e01bc60947..ca5e237df029 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1693,12 +1693,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 }
 
 
-static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
+static struct rtable *ipv4_validate_peer(struct rtable *rt)
 {
-	struct rtable *rt = (struct rtable *) dst;
-
-	if (rt_is_expired(rt))
-		return NULL;
 	if (rt->rt_peer_genid != rt_peer_genid()) {
 		struct inet_peer *peer;
 
@@ -1707,19 +1703,29 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 
 		peer = rt->peer;
 		if (peer) {
-			check_peer_pmtu(dst, peer);
+			check_peer_pmtu(&rt->dst, peer);
 
 			if (peer->redirect_genid != redirect_genid)
 				peer->redirect_learned.a4 = 0;
 			if (peer->redirect_learned.a4 &&
 			    peer->redirect_learned.a4 != rt->rt_gateway) {
-				if (check_peer_redir(dst, peer))
+				if (check_peer_redir(&rt->dst, peer))
 					return NULL;
 			}
 		}
 
 		rt->rt_peer_genid = rt_peer_genid();
 	}
+	return rt;
+}
+
+static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	struct rtable *rt = (struct rtable *) dst;
+
+	if (rt_is_expired(rt))
+		return NULL;
+	dst = (struct dst_entry *) ipv4_validate_peer(rt);
 	return dst;
 }
 
@@ -2374,6 +2380,9 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		    rth->rt_mark == skb->mark &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
+			rth = ipv4_validate_peer(rth);
+			if (!rth)
+				continue;
 			if (noref) {
 				dst_use_noref(&rth->dst, jiffies);
 				skb_dst_set_noref(skb, &rth->dst);
@@ -2749,6 +2758,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
+			rth = ipv4_validate_peer(rth);
+			if (!rth)
+				continue;
 			dst_use(&rth->dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
-- 
cgit v1.2.3


From b03b6dd58cef7d15b7c46a6729b83dd535ef08ab Mon Sep 17 00:00:00 2001
From: Vitalii Demianets <vitas@nppfactor.kiev.ua>
Date: Fri, 25 Nov 2011 00:16:37 +0000
Subject: bridge: master device stuck in no-carrier state forever when in
 user-stp mode

When in user-stp mode, bridge master do not follow state of its slaves, so
after the following sequence of events it can stuck forever in no-carrier
state:
1) turn stp off
2) put all slaves down - master device will follow their state and also go in
no-carrier state
3) turn stp on with bridge-stp script returning 0 (go to the user-stp mode)
Now bridge master won't follow slaves' state and will never reach running
state.

This patch solves the problem by making user-stp and kernel-stp behavior
similar regarding master following slaves' states.

Signed-off-by: Vitalii Demianets <vitas@nppfactor.kiev.ua>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c |  6 ++++++
 net/bridge/br_stp.c     | 29 ++++++++++++++---------------
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e5f9ece3c9a0..a1daf8227ed1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -18,6 +18,7 @@
 #include <net/sock.h>
 
 #include "br_private.h"
+#include "br_private_stp.h"
 
 static inline size_t br_nlmsg_size(void)
 {
@@ -188,6 +189,11 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 
 	p->state = new_state;
 	br_log_state(p);
+
+	spin_lock_bh(&p->br->lock);
+	br_port_state_selection(p->br);
+	spin_unlock_bh(&p->br->lock);
+
 	br_ifinfo_notify(RTM_NEWLINK, p);
 
 	return 0;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index ad0a3f7cf6cc..dd147d78a588 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -399,25 +399,24 @@ void br_port_state_selection(struct net_bridge *br)
 	struct net_bridge_port *p;
 	unsigned int liveports = 0;
 
-	/* Don't change port states if userspace is handling STP */
-	if (br->stp_enabled == BR_USER_STP)
-		return;
-
 	list_for_each_entry(p, &br->port_list, list) {
 		if (p->state == BR_STATE_DISABLED)
 			continue;
 
-		if (p->port_no == br->root_port) {
-			p->config_pending = 0;
-			p->topology_change_ack = 0;
-			br_make_forwarding(p);
-		} else if (br_is_designated_port(p)) {
-			del_timer(&p->message_age_timer);
-			br_make_forwarding(p);
-		} else {
-			p->config_pending = 0;
-			p->topology_change_ack = 0;
-			br_make_blocking(p);
+		/* Don't change port states if userspace is handling STP */
+		if (br->stp_enabled != BR_USER_STP) {
+			if (p->port_no == br->root_port) {
+				p->config_pending = 0;
+				p->topology_change_ack = 0;
+				br_make_forwarding(p);
+			} else if (br_is_designated_port(p)) {
+				del_timer(&p->message_age_timer);
+				br_make_forwarding(p);
+			} else {
+				p->config_pending = 0;
+				p->topology_change_ack = 0;
+				br_make_blocking(p);
+			}
 		}
 
 		if (p->state == BR_STATE_FORWARDING)
-- 
cgit v1.2.3


From 59c2cdae2791c0b2ee13d148edc6b771e7e7953f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 1 Dec 2011 14:12:55 -0500
Subject: Revert "udp: remove redundant variable"

This reverts commit 81d54ec8479a2c695760da81f05b5a9fb2dbe40a.

If we take the "try_again" goto, due to a checksum error,
the 'len' has already been truncated.  So we won't compute
the same values as the original code did.

Reported-by: paul bilke <fsmail@conspiracy.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 15 ++++++++-------
 net/ipv6/udp.c | 15 ++++++++-------
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ab0966df1e2a..5a65eeac1d29 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1164,7 +1164,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 	struct sk_buff *skb;
-	unsigned int ulen;
+	unsigned int ulen, copied;
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
@@ -1186,9 +1186,10 @@ try_again:
 		goto out;
 
 	ulen = skb->len - sizeof(struct udphdr);
-	if (len > ulen)
-		len = ulen;
-	else if (len < ulen)
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
 		msg->msg_flags |= MSG_TRUNC;
 
 	/*
@@ -1197,14 +1198,14 @@ try_again:
 	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
 
-	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
 		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
 	}
 
 	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, len);
+					      msg->msg_iov, copied);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb,
 						       sizeof(struct udphdr),
@@ -1233,7 +1234,7 @@ try_again:
 	if (inet->cmsg_flags)
 		ip_cmsg_recv(msg, skb);
 
-	err = len;
+	err = copied;
 	if (flags & MSG_TRUNC)
 		err = ulen;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 846f4757eb8d..8c2541915183 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -340,7 +340,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
-	unsigned int ulen;
+	unsigned int ulen, copied;
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
@@ -363,9 +363,10 @@ try_again:
 		goto out;
 
 	ulen = skb->len - sizeof(struct udphdr);
-	if (len > ulen)
-		len = ulen;
-	else if (len < ulen)
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
 		msg->msg_flags |= MSG_TRUNC;
 
 	is_udp4 = (skb->protocol == htons(ETH_P_IP));
@@ -376,14 +377,14 @@ try_again:
 	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
 
-	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
 		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
 	}
 
 	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov,len);
+					      msg->msg_iov, copied       );
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
 		if (err == -EINVAL)
@@ -432,7 +433,7 @@ try_again:
 			datagram_recv_ctl(sk, msg, skb);
 	}
 
-	err = len;
+	err = copied;
 	if (flags & MSG_TRUNC)
 		err = ulen;
 
-- 
cgit v1.2.3


From 1ee5fa1e9970a16036e37c7b9d5ce81c778252fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Dec 2011 11:06:34 +0000
Subject: sch_red: fix red_change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le mercredi 30 novembre 2011 à 14:36 -0800, Stephen Hemminger a écrit :

> (Almost) nobody uses RED because they can't figure it out.
> According to Wikipedia, VJ says that:
>  "there are not one, but two bugs in classic RED."

RED is useful for high throughput routers, I doubt many linux machines
act as such devices.

I was considering adding Adaptative RED (Sally Floyd, Ramakrishna
Gummadi, Scott Shender), August 2001

In this version, maxp is dynamic (from 1% to 50%), and user only have to
setup min_th (target average queue size)
(max_th and wq (burst in linux RED) are automatically setup)

By the way it seems we have a small bug in red_change()

if (skb_queue_empty(&sch->q))
	red_end_of_idle_period(&q->parms);

First, if queue is empty, we should call
red_start_of_idle_period(&q->parms);

Second, since we dont use anymore sch->q, but q->qdisc, the test is
meaningless.

Oh well...

[PATCH] sch_red: fix red_change()

Now RED is classful, we must check q->qdisc->q.qlen, and if queue is empty,
we start an idle period, not end it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_red.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6649463da1b6..d617161f8dd3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -209,8 +209,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 				 ctl->Plog, ctl->Scell_log,
 				 nla_data(tb[TCA_RED_STAB]));
 
-	if (skb_queue_empty(&sch->q))
-		red_end_of_idle_period(&q->parms);
+	if (!q->qdisc->q.qlen)
+		red_start_of_idle_period(&q->parms);
 
 	sch_tree_unlock(sch);
 	return 0;
-- 
cgit v1.2.3


From d01ff0a049f749e0bf10a35bb23edd012718c8c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?=
 <panweiping3@gmail.com>
Date: Thu, 1 Dec 2011 15:47:06 +0000
Subject: ipv4: flush route cache after change accept_local

After reset ipv4_devconf->data[IPV4_DEVCONF_ACCEPT_LOCAL] to 0,
we should flush route cache, or it will continue receive packets with local
source address, which should be dropped.

Signed-off-by: Weiping Pan <panweiping3@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c6b5092f29a1..65f01dc47565 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 			     void __user *buffer,
 			     size_t *lenp, loff_t *ppos)
 {
+	int old_value = *(int *)ctl->data;
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	int new_value = *(int *)ctl->data;
 
 	if (write) {
 		struct ipv4_devconf *cnf = ctl->extra1;
@@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 
 		if (cnf == net->ipv4.devconf_dflt)
 			devinet_copy_dflt_conf(net, i);
+		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
+			if ((new_value == 0) && (old_value != 0))
+				rt_cache_flush(net, 0);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 3ced1be5490f5c415d51a1e5918beeb9239d546b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 1 Dec 2011 22:19:01 -0500
Subject: netfilter: Remove ADVANCED dependency from NF_CONNTRACK_NETBIOS_NS

firewalld in Fedora 16 needs this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e8f379692294..d5597b759ba3 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -201,7 +201,6 @@ config NF_CONNTRACK_BROADCAST
 
 config NF_CONNTRACK_NETBIOS_NS
 	tristate "NetBIOS name service protocol support"
-	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_BROADCAST
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
-- 
cgit v1.2.3